diff --git a/docs/source/extensions/numbadoc.py b/docs/source/extensions/numbadoc.py
index 5b4202a1..06de3418 100644
--- a/docs/source/extensions/numbadoc.py
+++ b/docs/source/extensions/numbadoc.py
@@ -27,7 +27,7 @@ def import_object(self) -> bool:
         """
         success = super().import_object()
         if success:
-            # Store away numba wrapper
+            # store away numba wrapper
             self.jitobj = self.object
             # And bend references to underlying python function
             if hasattr(self.object, "py_func"):
diff --git a/docs/source/notebooks/DataCompression.ipynb b/docs/source/notebooks/DataCompression.ipynb
index fad9c9bc..74a26c92 100644
--- a/docs/source/notebooks/DataCompression.ipynb
+++ b/docs/source/notebooks/DataCompression.ipynb
@@ -61,8 +61,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "store = lgdo.LH5Store()\n",
-    "store.write_object(data, \"data\", \"data.lh5\", wo_mode=\"of\")\n",
+    "store = lgdo.lh5.LH5Store()\n",
+    "store.write(data, \"data\", \"data.lh5\", wo_mode=\"of\")\n",
     "lgdo.show(\"data.lh5\")"
    ]
   },
@@ -110,7 +110,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "lgdo.lh5_store.DEFAULT_HDF5_SETTINGS"
+    "lgdo.lh5.store.DEFAULT_HDF5_SETTINGS"
    ]
   },
   {
@@ -131,18 +131,18 @@
    "outputs": [],
    "source": [
     "# use another built-in filter\n",
-    "lgdo.lh5_store.DEFAULT_HDF5_SETTINGS = {\"compression\": \"lzf\"}\n",
+    "lgdo.lh5.store.DEFAULT_HDF5_SETTINGS = {\"compression\": \"lzf\"}\n",
     "\n",
     "# specify filter name and options\n",
-    "lgdo.lh5_store.DEFAULT_HDF5_SETTINGS = {\"compression\": \"gzip\", \"compression_opts\": 7}\n",
+    "lgdo.lh5.store.DEFAULT_HDF5_SETTINGS = {\"compression\": \"gzip\", \"compression_opts\": 7}\n",
     "\n",
     "# specify a registered filter provided by hdf5plugin\n",
     "import hdf5plugin\n",
     "\n",
-    "lgdo.lh5_store.DEFAULT_HDF5_SETTINGS = {\"compression\": hdf5plugin.Blosc()}\n",
+    "lgdo.lh5.store.DEFAULT_HDF5_SETTINGS = {\"compression\": hdf5plugin.Blosc()}\n",
     "\n",
     "# shuffle bytes before compressing (typically better compression ratio with no performance penalty)\n",
-    "lgdo.lh5_store.DEFAULT_HDF5_SETTINGS = {\"shuffle\": True, \"compression\": \"lzf\"}"
+    "lgdo.lh5.store.DEFAULT_HDF5_SETTINGS = {\"shuffle\": True, \"compression\": \"lzf\"}"
    ]
   },
   {
@@ -166,7 +166,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "store.write_object(data, \"data\", \"data.lh5\", wo_mode=\"of\")\n",
+    "store.write(data, \"data\", \"data.lh5\", wo_mode=\"of\")\n",
     "show_h5ds_opts(\"data/col1\")"
    ]
   },
@@ -175,7 +175,7 @@
    "id": "f597a9e2",
    "metadata": {},
    "source": [
-    "Nice. Shuffling bytes before compressing significantly reduced size on disk. Last but not least, `create_dataset()` keyword arguments can be passed to `write_object()`. They will be forwarded as is, overriding default settings."
+    "Nice. Shuffling bytes before compressing significantly reduced size on disk. Last but not least, `create_dataset()` keyword arguments can be passed to `write()`. They will be forwarded as is, overriding default settings."
    ]
   },
   {
@@ -185,9 +185,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "store.write_object(\n",
-    "    data, \"data\", \"data.lh5\", wo_mode=\"of\", shuffle=True, compression=\"gzip\"\n",
-    ")\n",
+    "store.write(data, \"data\", \"data.lh5\", wo_mode=\"of\", shuffle=True, compression=\"gzip\")\n",
     "show_h5ds_opts(\"data/col1\")"
    ]
   },
@@ -207,7 +205,7 @@
    "outputs": [],
    "source": [
     "data[\"col2\"].attrs[\"hdf5_settings\"] = {\"compression\": \"gzip\"}\n",
-    "store.write_object(data, \"data\", \"data.lh5\", wo_mode=\"of\")\n",
+    "store.write(data, \"data\", \"data.lh5\", wo_mode=\"of\")\n",
     "\n",
     "show_h5ds_opts(\"data/col1\")\n",
     "show_h5ds_opts(\"data/col2\")"
@@ -221,7 +219,7 @@
     "We are now storing table columns with different compression settings.\n",
     "\n",
     "<div class=\"alert alert-info\">\n",
-    "**Note:** since any [h5py.Group.create_dataset()](https://docs.h5py.org/en/stable/high/group.html#h5py.Group.create_dataset) keyword argument can be used in `write_object()` or set in the `hdf5_settings` attribute, other HDF5 dataset settings can be configured, like the chunk size.\n",
+    "**Note:** since any [h5py.Group.create_dataset()](https://docs.h5py.org/en/stable/high/group.html#h5py.Group.create_dataset) keyword argument can be used in `write()` or set in the `hdf5_settings` attribute, other HDF5 dataset settings can be configured, like the chunk size.\n",
     "</div>"
    ]
   },
@@ -232,7 +230,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "store.write_object(data, \"data\", \"data.lh5\", wo_mode=\"of\", chunks=2)"
+    "store.write(data, \"data\", \"data.lh5\", wo_mode=\"of\", chunks=2)"
    ]
   },
   {
@@ -257,7 +255,7 @@
     "from legendtestdata import LegendTestData\n",
     "\n",
     "ldata = LegendTestData()\n",
-    "wfs, n_rows = store.read_object(\n",
+    "wfs, n_rows = store.read(\n",
     "    \"geds/raw/waveform\",\n",
     "    ldata.get_path(\"lh5/LDQTA_r117_20200110T105115Z_cal_geds_raw.lh5\"),\n",
     ")\n",
@@ -347,7 +345,7 @@
     "    t0=wfs.t0,\n",
     "    dt=wfs.dt,\n",
     ")\n",
-    "store.write_object(enc_wfs, \"waveforms\", \"data.lh5\", wo_mode=\"o\")\n",
+    "store.write(enc_wfs, \"waveforms\", \"data.lh5\", wo_mode=\"o\")\n",
     "lgdo.show(\"data.lh5\", attrs=True)"
    ]
   },
@@ -372,7 +370,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "obj, _ = store.read_object(\"waveforms\", \"data.lh5\")\n",
+    "obj, _ = store.read(\"waveforms\", \"data.lh5\")\n",
     "obj.values"
    ]
   },
@@ -391,7 +389,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "obj, _ = store.read_object(\"waveforms\", \"data.lh5\", decompress=False)\n",
+    "obj, _ = store.read(\"waveforms\", \"data.lh5\", decompress=False)\n",
     "obj.values"
    ]
   },
@@ -433,9 +431,9 @@
     "from lgdo.compression import ULEB128ZigZagDiff\n",
     "\n",
     "wfs.values.attrs[\"compression\"] = ULEB128ZigZagDiff()\n",
-    "store.write_object(wfs, \"waveforms\", \"data.lh5\", wo_mode=\"of\")\n",
+    "store.write(wfs, \"waveforms\", \"data.lh5\", wo_mode=\"of\")\n",
     "\n",
-    "obj, _ = store.read_object(\"waveforms\", \"data.lh5\", decompress=False)\n",
+    "obj, _ = store.read(\"waveforms\", \"data.lh5\", decompress=False)\n",
     "obj.values.attrs[\"codec\"]"
    ]
   },
@@ -447,8 +445,8 @@
     "Further reading:\n",
     "\n",
     "- [Available waveform compression algorithms](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.compression.html)\n",
-    "- [read_object() docstring](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.html#lgdo.lh5_store.LH5Store.read_object)\n",
-    "- [write_object() docstring](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.html#lgdo.lh5_store.LH5Store.write_object)"
+    "- [read() docstring](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.html#lgdo.lh5.store.LH5Store.read)\n",
+    "- [write() docstring](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.html#lgdo.lh5_store.LH5Store.write)"
    ]
   }
  ],
diff --git a/docs/source/notebooks/LH5Files.ipynb b/docs/source/notebooks/LH5Files.ipynb
index 8563f4bd..9c594be9 100644
--- a/docs/source/notebooks/LH5Files.ipynb
+++ b/docs/source/notebooks/LH5Files.ipynb
@@ -38,7 +38,7 @@
    "id": "c136b537",
    "metadata": {},
    "source": [
-    "We can use `lgdo.lh5_store.ls()` [[docs]](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.html#lgdo.lh5_store.ls) to inspect the file contents:"
+    "We can use `lgdo.lh5.ls()` [[docs]](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.html#lgdo.lh5.ls) to inspect the file contents:"
    ]
   },
   {
@@ -131,7 +131,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "store.read_object(\"geds/raw\", lh5_file)"
+    "store.read(\"geds/raw\", lh5_file)"
    ]
   },
   {
@@ -149,7 +149,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "obj, n_rows = store.read_object(\"geds/raw/timestamp\", lh5_file)\n",
+    "obj, n_rows = store.read(\"geds/raw/timestamp\", lh5_file)\n",
     "obj"
    ]
   },
@@ -170,7 +170,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "obj, n_rows = store.read_object(\"geds/raw/timestamp\", lh5_file, start_row=15, n_rows=10)\n",
+    "obj, n_rows = store.read(\"geds/raw/timestamp\", lh5_file, start_row=15, n_rows=10)\n",
     "print(obj)"
    ]
   },
@@ -189,7 +189,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "obj, n_rows = store.read_object(\n",
+    "obj, n_rows = store.read(\n",
     "    \"geds/raw\", lh5_file, field_mask=(\"timestamp\", \"energy\"), idx=[1, 3, 7, 9, 10, 15]\n",
     ")\n",
     "print(obj)"
@@ -200,7 +200,7 @@
    "id": "b3f52d77",
    "metadata": {},
    "source": [
-    "As you might have noticed, `read_object()` loads all the requested data in memory at once. This can be a problem when dealing with large datasets. `LH5Iterator` [[docs]](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.html#lgdo.lh5_store.LH5Iterator) makes it possible to handle data one chunk at a time (sequentially) to avoid running out of memory:"
+    "As you might have noticed, `read_object()` loads all the requested data in memory at once. This can be a problem when dealing with large datasets. `LH5Iterator` [[docs]](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.html#lgdo.lh5.iterator.LH5Iterator) makes it possible to handle data one chunk at a time (sequentially) to avoid running out of memory:"
    ]
   },
   {
@@ -260,9 +260,7 @@
    "source": [
     "store = LH5Store()\n",
     "\n",
-    "store.write_object(\n",
-    "    scalar, name=\"message\", lh5_file=\"my_objects.lh5\", wo_mode=\"overwrite_file\"\n",
-    ")"
+    "store.write(scalar, name=\"message\", lh5_file=\"my_objects.lh5\", wo_mode=\"overwrite_file\")"
    ]
   },
   {
@@ -300,10 +298,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "store.write_object(array, name=\"numbers\", group=\"closet\", lh5_file=\"my_objects.lh5\")\n",
-    "store.write_object(\n",
-    "    wf_table, name=\"waveforms\", group=\"closet\", lh5_file=\"my_objects.lh5\"\n",
-    ")\n",
+    "store.write(array, name=\"numbers\", group=\"closet\", lh5_file=\"my_objects.lh5\")\n",
+    "store.write(wf_table, name=\"waveforms\", group=\"closet\", lh5_file=\"my_objects.lh5\")\n",
     "show(\"my_objects.lh5\")"
    ]
   },
diff --git a/src/lgdo/__init__.py b/src/lgdo/__init__.py
index 5e6eb7e0..25efb08d 100644
--- a/src/lgdo/__init__.py
+++ b/src/lgdo/__init__.py
@@ -66,11 +66,11 @@
     "VectorOfVectors",
     "VectorOfEncodedVectors",
     "WaveformTable",
-    "LH5Iterator",
-    "LH5Store",
     "load_dfs",
     "load_nda",
     "ls",
     "show",
+    "LH5Iterator",
+    "LH5Store",
     "__version__",
 ]
diff --git a/src/lgdo/cli.py b/src/lgdo/cli.py
index 24ba56d1..2273579a 100644
--- a/src/lgdo/cli.py
+++ b/src/lgdo/cli.py
@@ -9,7 +9,7 @@
 
 
 def lh5ls():
-    """:func:`.show` command line interface."""
+    """:func:`.lh5.show` command line interface."""
     parser = argparse.ArgumentParser(
         prog="lh5ls", description="Inspect LEGEND HDF5 (LH5) file contents"
     )
diff --git a/src/lgdo/lgdo_utils.py b/src/lgdo/lgdo_utils.py
index 05b46bd5..cddd2111 100644
--- a/src/lgdo/lgdo_utils.py
+++ b/src/lgdo/lgdo_utils.py
@@ -1,149 +1,56 @@
-"""Implements utilities for LEGEND Data Objects."""
 from __future__ import annotations
 
-import glob
-import logging
-import os
-import string
+from warnings import warn
 
 import numpy as np
 
 from . import types as lgdo
+from .lh5 import utils
 
-log = logging.getLogger(__name__)
 
-
-def get_element_type(obj: object) -> str:
-    """Get the LGDO element type of a scalar or array.
-
-    For use in LGDO datatype attributes.
-
-    Parameters
-    ----------
-    obj
-        if a ``str``, will automatically return ``string`` if the object has
-        a :class:`numpy.dtype`, that will be used for determining the element
-        type otherwise will attempt to case the type of the object to a
-        :class:`numpy.dtype`.
-
-    Returns
-    -------
-    element_type
-        A string stating the determined element type of the object.
-    """
-
-    # special handling for strings
-    if isinstance(obj, str):
-        return "string"
-
-    # the rest use dtypes
-    dt = obj.dtype if hasattr(obj, "dtype") else np.dtype(type(obj))
-    kind = dt.kind
-
-    if kind == "b":
-        return "bool"
-    if kind == "V":
-        return "blob"
-    if kind in ["i", "u", "f"]:
-        return "real"
-    if kind == "c":
-        return "complex"
-    if kind in ["S", "U"]:
-        return "string"
-
-    # couldn't figure it out
-    raise ValueError(
-        "cannot determine lgdo element_type for object of type", type(obj).__name__
+def copy(obj: lgdo.LGDO, dtype: np.dtype = None) -> None:
+    warn(
+        "lgdo_utils.copy will soon be removed and will be replaced soon with copy member functions of each LGDO data type.",
+        DeprecationWarning,
+        stacklevel=2,
     )
+    return utils.copy(obj, dtype)
 
 
-def copy(obj: lgdo.LGDO, dtype: np.dtype = None) -> lgdo.LGDO:
-    """Return a copy of an LGDO.
-
-    Parameters
-    ----------
-    obj
-        the LGDO to be copied.
-    dtype
-        NumPy dtype to be used for the copied object.
-
-    """
-    if dtype is None:
-        dtype = obj.dtype
-
-    if isinstance(obj, lgdo.Array):
-        return lgdo.Array(
-            np.array(obj.nda, dtype=dtype, copy=True), attrs=dict(obj.attrs)
-        )
-
-    if isinstance(obj, lgdo.VectorOfVectors):
-        return lgdo.VectorOfVectors(
-            flattened_data=copy(obj.flattened_data, dtype=dtype),
-            cumulative_length=copy(obj.cumulative_length),
-            attrs=dict(obj.attrs),
-        )
-
-    else:
-        raise ValueError(f"copy of {type(obj)} not supported")
+def get_element_type(obj: object) -> str:
+    warn(
+        "'lgdo.lgdo_utils' has been renamed to 'lgdo.utils'. "
+        "Please replace either 'import lgdo.lgdo_utils as utils' with 'import lgdo.utils as utils' "
+        "or 'from lgdo.lgdo_utils import get_element_type' with 'from lgdo.utils import get_element_type'."
+        "'lgdo.lgdo_utils' will be removed in a future release.",
+        DeprecationWarning,
+        stacklevel=2,
+    )
+    return utils.get_element_type(obj)
 
 
 def parse_datatype(datatype: str) -> tuple[str, tuple[int, ...], str | list[str]]:
-    """Parse datatype string and return type, dimensions and elements.
-
-    Parameters
-    ----------
-    datatype
-        a LGDO-formatted datatype string.
-
-    Returns
-    -------
-    element_type
-        the datatype name dims if not ``None``, a tuple of dimensions for the
-        LGDO. Note this is not the same as the NumPy shape of the underlying
-        data object. See the LGDO specification for more information. Also see
-        :class:`~.types.ArrayOfEqualSizedArrays` and
-        :meth:`.lh5_store.LH5Store.read_object` for example code elements for
-        numeric objects, the element type for struct-like  objects, the list of
-        fields in the struct.
-    """
-    if "{" not in datatype:
-        return "scalar", None, datatype
-
-    # for other datatypes, need to parse the datatype string
-    from parse import parse
-
-    datatype, element_description = parse("{}{{{}}}", datatype)
-    if datatype.endswith(">"):
-        datatype, dims = parse("{}<{}>", datatype)
-        dims = [int(i) for i in dims.split(",")]
-        return datatype, tuple(dims), element_description
-    else:
-        return datatype, None, element_description.split(",")
+    warn(
+        "'lgdo.lgdo_utils' has been renamed to 'lgdo.utils'. "
+        "Please replace either 'import lgdo.lgdo_utils as utils' with 'import lgdo.utils as utils' "
+        "or 'from lgdo.lgdo_utils import parse_datatype' with 'from lgdo.utils import parse_datatype'."
+        "'lgdo.lgdo_utils' will be removed in a future release.",
+        DeprecationWarning,
+        stacklevel=2,
+    )
+    return utils.parse_datatype(datatype)
 
 
 def expand_vars(expr: str, substitute: dict[str, str] = None) -> str:
-    """Expand (environment) variables.
-
-    Note
-    ----
-    Malformed variable names and references to non-existing variables are left
-    unchanged.
-
-    Parameters
-    ----------
-    expr
-        string expression, which may include (environment) variables prefixed by
-        ``$``.
-    substitute
-        use this dictionary to substitute variables. Takes precedence over
-        environment variables.
-    """
-    if substitute is None:
-        substitute = {}
-
-    # use provided mapping
-    # then expand env variables
-    return os.path.expandvars(string.Template(expr).safe_substitute(substitute))
+    warn(
+        "'lgdo.lgdo_utils' has been renamed to 'lgdo.utils'. "
+        "Please replace either 'import lgdo.lgdo_utils as utils' with 'import lgdo.utils as utils' "
+        "or 'from lgdo.lgdo_utils import expand_vars' with 'from lgdo.utils import expand_vars'."
+        "'lgdo.lgdo_utils' will be removed in a future release.",
+        DeprecationWarning,
+        stacklevel=2,
+    )
+    return utils.expand_vars(expr, substitute)
 
 
 def expand_path(
@@ -152,45 +59,12 @@ def expand_path(
     list: bool = False,
     base_path: str = None,
 ) -> str | list:
-    """Expand (environment) variables and wildcards to return absolute paths.
-
-    Parameters
-    ----------
-    path
-        name of path, which may include environment variables and wildcards.
-    list
-        if ``True``, return a list. If ``False``, return a string; if ``False``
-        and a unique file is not found, raise an exception.
-    substitute
-        use this dictionary to substitute variables. Environment variables take
-        precedence.
-    base_path
-        name of base path. Returned paths will be relative to base.
-
-    Returns
-    -------
-    path or list of paths
-        Unique absolute path, or list of all absolute paths
-    """
-    if base_path is not None and base_path != "":
-        base_path = os.path.expanduser(os.path.expandvars(base_path))
-        path = os.path.join(base_path, path)
-
-    # first expand variables
-    _path = expand_vars(path, substitute)
-
-    # then expand wildcards
-    paths = sorted(glob.glob(os.path.expanduser(_path)))
-
-    if base_path is not None and base_path != "":
-        paths = [os.path.relpath(p, base_path) for p in paths]
-
-    if not list:
-        if len(paths) == 0:
-            raise FileNotFoundError(f"could not find path matching {path}")
-        elif len(paths) > 1:
-            raise FileNotFoundError(f"found multiple paths matching {path}")
-        else:
-            return paths[0]
-    else:
-        return paths
+    warn(
+        "'lgdo.lgdo_utils' has been renamed to 'lgdo.utils'. "
+        "Please replace either 'import lgdo.lgdo_utils as utils' with 'import lgdo.utils as utils' "
+        "or 'from lgdo.lgdo_utils import expand_path' with 'from lgdo.utils import expand_path'."
+        "'lgdo.lgdo_utils' will be removed in a future release. ",
+        DeprecationWarning,
+        stacklevel=2,
+    )
+    return utils.expand_path(path, substitute, list, base_path)
diff --git a/src/lgdo/lh5/__init__.py b/src/lgdo/lh5/__init__.py
new file mode 100644
index 00000000..6263372a
--- /dev/null
+++ b/src/lgdo/lh5/__init__.py
@@ -0,0 +1,18 @@
+"""Routines from reading and writing LEGEND Data Objects in HDF5 files.
+Currently the primary on-disk format for LGDO object is LEGEND HDF5 (LH5) files. IO
+is done via the class :class:`.store.LH5Store`. LH5 files can also be
+browsed easily in python like any `HDF5 <https://www.hdfgroup.org>`_ file using
+`h5py <https://www.h5py.org>`_.
+"""
+
+from .iterator import LH5Iterator
+from .store import LH5Store, load_dfs, load_nda, ls, show
+
+__all__ = [
+    "LH5Iterator",
+    "LH5Store",
+    "load_dfs",
+    "load_nda",
+    "ls",
+    "show",
+]
diff --git a/src/lgdo/lh5/iterator.py b/src/lgdo/lh5/iterator.py
new file mode 100644
index 00000000..534a7c05
--- /dev/null
+++ b/src/lgdo/lh5/iterator.py
@@ -0,0 +1,310 @@
+from __future__ import annotations
+
+import logging
+import typing as typing
+
+import numpy as np
+import pandas as pd
+
+from ..types import Array, Scalar, Struct, VectorOfVectors
+from .store import LH5Store
+from .utils import expand_path
+
+LGDO = typing.Union[Array, Scalar, Struct, VectorOfVectors]
+
+
+class LH5Iterator(typing.Iterator):
+    """
+    A class for iterating through one or more LH5 files, one block of entries
+    at a time. This also accepts an entry list/mask to enable event selection,
+    and a field mask.
+
+    This class can be used either for random access:
+
+    >>> lh5_obj, n_rows = lh5_it.read(entry)
+
+    to read the block of entries starting at entry. In case of multiple files
+    or the use of an event selection, entry refers to a global event index
+    across files and does not count events that are excluded by the selection.
+
+    This can also be used as an iterator:
+
+    >>> for lh5_obj, entry, n_rows in LH5Iterator(...):
+    >>>    # do the thing!
+
+    This is intended for if you are reading a large quantity of data but
+    want to limit your memory usage (particularly when reading in waveforms!).
+    The ``lh5_obj`` that is read by this class is reused in order to avoid
+    reallocation of memory; this means that if you want to hold on to data
+    between reads, you will have to copy it somewhere!
+    """
+
+    def __init__(
+        self,
+        lh5_files: str | list[str],
+        groups: str | list[str],
+        base_path: str = "",
+        entry_list: list[int] | list[list[int]] = None,
+        entry_mask: list[bool] | list[list[bool]] = None,
+        field_mask: dict[str, bool] | list[str] | tuple[str] = None,
+        buffer_len: int = 3200,
+        friend: typing.Iterator = None,
+    ) -> None:
+        """
+        Parameters
+        ----------
+        lh5_files
+            file or files to read from. May include wildcards and environment
+            variables.
+        groups
+            HDF5 group(s) to read. If a list is provided for both lh5_files
+            and group, they must be the same size. If a file is wild-carded,
+            the same group will be assigned to each file found
+        entry_list
+            list of entry numbers to read. If a nested list is provided,
+            expect one top-level list for each file, containing a list of
+            local entries. If a list of ints is provided, use global entries.
+        entry_mask
+            mask of entries to read. If a list of arrays is provided, expect
+            one for each file. Ignore if a selection list is provided.
+        field_mask
+            mask of which fields to read. See :meth:`LH5Store.read` for
+            more details.
+        buffer_len
+            number of entries to read at a time while iterating through files.
+        friend
+            a ''friend'' LH5Iterator that will be read in parallel with this.
+            The friend should have the same length and entry list. A single
+            LH5 table containing columns from both iterators will be returned.
+        """
+        self.lh5_st = LH5Store(base_path=base_path, keep_open=True)
+
+        # List of files, with wildcards and env vars expanded
+        if isinstance(lh5_files, str):
+            lh5_files = [lh5_files]
+            if isinstance(groups, list):
+                lh5_files *= len(groups)
+        elif not isinstance(lh5_files, list):
+            raise ValueError("lh5_files must be a string or list of strings")
+
+        if isinstance(groups, str):
+            groups = [groups] * len(lh5_files)
+        elif not isinstance(groups, list):
+            raise ValueError("group must be a string or list of strings")
+
+        if not len(groups) == len(lh5_files):
+            raise ValueError("lh5_files and groups must have same length")
+
+        self.lh5_files = []
+        self.groups = []
+        for f, g in zip(lh5_files, groups):
+            f_exp = expand_path(f, list=True, base_path=base_path)
+            self.lh5_files += f_exp
+            self.groups += [g] * len(f_exp)
+
+        if entry_list is not None and entry_mask is not None:
+            raise ValueError(
+                "entry_list and entry_mask arguments are mutually exclusive"
+            )
+
+        # Map to last row in each file
+        self.file_map = np.full(len(self.lh5_files), np.iinfo("i").max, "i")
+        # Map to last iterator entry for each file
+        self.entry_map = np.full(len(self.lh5_files), np.iinfo("i").max, "i")
+        self.buffer_len = buffer_len
+
+        if len(self.lh5_files) > 0:
+            f = self.lh5_files[0]
+            g = self.groups[0]
+            self.lh5_buffer = self.lh5_st.get_buffer(
+                g,
+                f,
+                size=self.buffer_len,
+                field_mask=field_mask,
+            )
+            self.file_map[0] = self.lh5_st.read_n_rows(g, f)
+        else:
+            raise RuntimeError(f"can't open any files from {lh5_files}")
+
+        self.n_rows = 0
+        self.current_entry = 0
+        self.next_entry = 0
+
+        self.field_mask = field_mask
+
+        # List of entry indices from each file
+        self.local_entry_list = None
+        self.global_entry_list = None
+        if entry_list is not None:
+            entry_list = list(entry_list)
+            if isinstance(entry_list[0], int):
+                self.local_entry_list = [None] * len(self.file_map)
+                self.global_entry_list = np.array(entry_list, "i")
+                self.global_entry_list.sort()
+
+            else:
+                self.local_entry_list = [[]] * len(self.file_map)
+                for i_file, local_list in enumerate(entry_list):
+                    self.local_entry_list[i_file] = np.array(local_list, "i")
+                    self.local_entry_list[i_file].sort()
+
+        elif entry_mask is not None:
+            # Convert entry mask into an entry list
+            if isinstance(entry_mask, pd.Series):
+                entry_mask = entry_mask.values
+            if isinstance(entry_mask, np.ndarray):
+                self.local_entry_list = [None] * len(self.file_map)
+                self.global_entry_list = np.nonzero(entry_mask)[0]
+            else:
+                self.local_entry_list = [[]] * len(self.file_map)
+                for i_file, local_mask in enumerate(entry_mask):
+                    self.local_entry_list[i_file] = np.nonzero(local_mask)[0]
+
+        # Attach the friend
+        if friend is not None:
+            if not isinstance(friend, typing.Iterator):
+                raise ValueError("Friend must be an Iterator")
+            self.lh5_buffer.join(friend.lh5_buffer)
+        self.friend = friend
+
+    def _get_file_cumlen(self, i_file: int) -> int:
+        """Helper to get cumulative file length of file"""
+        if i_file < 0:
+            return 0
+        fcl = self.file_map[i_file]
+        if fcl == np.iinfo("i").max:
+            fcl = self._get_file_cumlen(i_file - 1) + self.lh5_st.read_n_rows(
+                self.groups[i_file], self.lh5_files[i_file]
+            )
+            self.file_map[i_file] = fcl
+        return fcl
+
+    def _get_file_cumentries(self, i_file: int) -> int:
+        """Helper to get cumulative iterator entries in file"""
+        if i_file < 0:
+            return 0
+        n = self.entry_map[i_file]
+        if n == np.iinfo("i").max:
+            elist = self.get_file_entrylist(i_file)
+            fcl = self._get_file_cumlen(i_file)
+            if elist is None:
+                # no entry list provided
+                n = fcl
+            else:
+                file_entries = self.get_file_entrylist(i_file)
+                n = len(file_entries)
+                # check that file entries fall inside of file
+                if n > 0 and file_entries[-1] >= fcl:
+                    logging.warning(f"Found entries out of range for file {i_file}")
+                    n = np.searchsorted(file_entries, fcl, "right")
+                n += self._get_file_cumentries(i_file - 1)
+            self.entry_map[i_file] = n
+        return n
+
+    def get_file_entrylist(self, i_file: int) -> np.ndarray:
+        """Helper to get entry list for file"""
+        # If no entry list is provided
+        if self.local_entry_list is None:
+            return None
+
+        elist = self.local_entry_list[i_file]
+        if elist is None:
+            # Get local entrylist for this file from global entry list
+            f_start = self._get_file_cumlen(i_file - 1)
+            f_end = self._get_file_cumlen(i_file)
+            i_start = self._get_file_cumentries(i_file - 1)
+            i_stop = np.searchsorted(self.global_entry_list, f_end, "right")
+            elist = np.array(self.global_entry_list[i_start:i_stop], "i") - f_start
+            self.local_entry_list[i_file] = elist
+        return elist
+
+    def get_global_entrylist(self) -> np.ndarray:
+        """Get global entry list, constructing it if needed"""
+        if self.global_entry_list is None and self.local_entry_list is not None:
+            self.global_entry_list = np.zeros(len(self), "i")
+            for i_file in range(len(self.lh5_files)):
+                i_start = self.get_file_cumentries(i_file - 1)
+                i_stop = self.get_file_cumentries(i_file)
+                f_start = self.get_file_cumlen(i_file - 1)
+                self.global_entry_list[i_start:i_stop] = (
+                    self.get_file_entrylist(i_file) + f_start
+                )
+        return self.global_entry_list
+
+    def read(self, entry: int) -> tuple[LGDO, int]:
+        """Read the nextlocal chunk of events, starting at entry. Return the
+        LH5 buffer and number of rows read."""
+        self.n_rows = 0
+        i_file = np.searchsorted(self.entry_map, entry, "right")
+
+        # if file hasn't been opened yet, search through files
+        # sequentially until we find the right one
+        if i_file < len(self.lh5_files) and self.entry_map[i_file] == np.iinfo("i").max:
+            while i_file < len(self.lh5_files) and entry >= self._get_file_cumentries(
+                i_file
+            ):
+                i_file += 1
+
+        if i_file == len(self.lh5_files):
+            return (self.lh5_buffer, self.n_rows)
+        local_entry = entry - self._get_file_cumentries(i_file - 1)
+
+        while self.n_rows < self.buffer_len and i_file < len(self.file_map):
+            # Loop through files
+            local_idx = self.get_file_entrylist(i_file)
+            if local_idx is not None and len(local_idx) == 0:
+                i_file += 1
+                local_entry = 0
+                continue
+
+            i_local = local_idx[local_entry] if local_idx is not None else local_entry
+            self.lh5_buffer, n_rows = self.lh5_st.read(
+                self.groups[i_file],
+                self.lh5_files[i_file],
+                start_row=i_local,
+                n_rows=self.buffer_len - self.n_rows,
+                idx=local_idx,
+                field_mask=self.field_mask,
+                obj_buf=self.lh5_buffer,
+                obj_buf_start=self.n_rows,
+            )
+
+            self.n_rows += n_rows
+            i_file += 1
+            local_entry = 0
+
+        self.current_entry = entry
+
+        if self.friend is not None:
+            self.friend.read(entry)
+
+        return (self.lh5_buffer, self.n_rows)
+
+    def reset_field_mask(self, mask):
+        """Replaces the field mask of this iterator and any friends with mask"""
+        self.field_mask = mask
+        if self.friend is not None:
+            self.friend.reset_field_mask(mask)
+
+    def __len__(self) -> int:
+        """Return the total number of entries."""
+        return (
+            self._get_file_cumentries(len(self.lh5_files) - 1)
+            if len(self.entry_map) > 0
+            else 0
+        )
+
+    def __iter__(self) -> typing.Iterator:
+        """Loop through entries in blocks of size buffer_len."""
+        self.current_entry = 0
+        self.next_entry = 0
+        return self
+
+    def __next__(self) -> tuple[LGDO, int, int]:
+        """Read next buffer_len entries and return lh5_table, iterator entry
+        and n_rows read."""
+        buf, n_rows = self.read(self.next_entry)
+        self.next_entry = self.current_entry + n_rows
+        if n_rows == 0:
+            raise StopIteration
+        return (buf, self.current_entry, n_rows)
diff --git a/src/lgdo/lh5/store.py b/src/lgdo/lh5/store.py
new file mode 100644
index 00000000..3c2aa696
--- /dev/null
+++ b/src/lgdo/lh5/store.py
@@ -0,0 +1,1535 @@
+"""
+This module implements routines from reading and writing LEGEND Data Objects in
+HDF5 files.
+"""
+from __future__ import annotations
+
+import fnmatch
+import glob
+import logging
+import os
+import sys
+from bisect import bisect_left
+from collections import defaultdict
+from typing import Any, Union
+
+import h5py
+import numba as nb
+import numpy as np
+import pandas as pd
+
+from .. import compression as compress
+from ..compression import WaveformCodec
+from ..types import (
+    Array,
+    ArrayOfEncodedEqualSizedArrays,
+    ArrayOfEqualSizedArrays,
+    FixedSizeArray,
+    Scalar,
+    Struct,
+    Table,
+    VectorOfEncodedVectors,
+    VectorOfVectors,
+    WaveformTable,
+)
+from .utils import expand_path, parse_datatype
+
+LGDO = Union[Array, Scalar, Struct, VectorOfVectors]
+
+log = logging.getLogger(__name__)
+
+DEFAULT_HDF5_SETTINGS: dict[str, ...] = {"shuffle": True, "compression": "gzip"}
+DEFAULT_HDF5_COMPRESSION = None
+
+
+class LH5Store:
+    """
+    Class to represent a store of LEGEND HDF5 files. The two main methods
+    implemented by the class are :meth:`read` and :meth:`write`.
+
+    Examples
+    --------
+    >>> from lgdo import LH5Store
+    >>> store = LH5Store()
+    >>> obj, _ = store.read("/geds/waveform", "file.lh5")
+    >>> type(obj)
+    lgdo.waveform_table.WaveformTable
+    """
+
+    def __init__(self, base_path: str = "", keep_open: bool = False) -> None:
+        """
+        Parameters
+        ----------
+        base_path
+            directory path to prepend to LH5 files.
+        keep_open
+            whether to keep files open by storing the :mod:`h5py` objects as
+            class attributes.
+        """
+        self.base_path = "" if base_path == "" else expand_path(base_path)
+        self.keep_open = keep_open
+        self.files = {}
+
+    def gimme_file(self, lh5_file: str | h5py.File, mode: str = "r") -> h5py.File:
+        """Returns a :mod:`h5py` file object from the store or creates a new one.
+
+        Parameters
+        ----------
+        lh5_file
+            LH5 file name.
+        mode
+            mode in which to open file. See :class:`h5py.File` documentation.
+        """
+        if isinstance(lh5_file, h5py.File):
+            return lh5_file
+        if mode == "r":
+            lh5_file = expand_path(lh5_file, base_path=self.base_path)
+        if lh5_file in self.files.keys():
+            return self.files[lh5_file]
+        if self.base_path != "":
+            full_path = os.path.join(self.base_path, lh5_file)
+        else:
+            full_path = lh5_file
+        if mode != "r":
+            directory = os.path.dirname(full_path)
+            if directory != "" and not os.path.exists(directory):
+                log.debug(f"making path {directory}")
+                os.makedirs(directory)
+        if mode == "r" and not os.path.exists(full_path):
+            raise FileNotFoundError(f"file {full_path} not found")
+        if mode != "r" and os.path.exists(full_path):
+            log.debug(f"opening existing file {full_path} in mode '{mode}'")
+        h5f = h5py.File(full_path, mode)
+        if self.keep_open:
+            self.files[lh5_file] = h5f
+        return h5f
+
+    def gimme_group(
+        self,
+        group: str | h5py.Group,
+        base_group: h5py.Group,
+        grp_attrs: dict[str, Any] = None,
+        overwrite: bool = False,
+    ) -> h5py.Group:
+        """
+        Returns an existing :class:`h5py` group from a base group or creates a
+        new one. Can also set (or replace) group attributes.
+
+        Parameters
+        ----------
+        group
+            name of the HDF5 group.
+        base_group
+            HDF5 group to be used as a base.
+        grp_attrs
+            HDF5 group attributes.
+        overwrite
+            whether overwrite group attributes, ignored if `grp_attrs` is
+            ``None``.
+        """
+        if not isinstance(group, h5py.Group):
+            if group in base_group:
+                group = base_group[group]
+            else:
+                group = base_group.create_group(group)
+                if grp_attrs is not None:
+                    group.attrs.update(grp_attrs)
+                return group
+        if (
+            grp_attrs is not None
+            and len(set(grp_attrs.items()) ^ set(group.attrs.items())) > 0
+        ):
+            if not overwrite:
+                raise RuntimeError("grp_attrs != group.attrs but overwrite not set")
+            else:
+                log.debug(f"overwriting {group}.attrs...")
+                for key in group.attrs.keys():
+                    group.attrs.pop(key)
+                group.attrs.update(grp_attrs)
+        return group
+
+    def get_buffer(
+        self,
+        name: str,
+        lh5_file: str | h5py.File | list[str | h5py.File],
+        size: int = None,
+        field_mask: dict[str, bool] | list[str] | tuple[str] = None,
+    ) -> LGDO:
+        """Returns an LH5 object appropriate for use as a pre-allocated buffer
+        in a read loop. Sets size to `size` if object has a size.
+        """
+        obj, n_rows = self.read(name, lh5_file, n_rows=0, field_mask=field_mask)
+        if hasattr(obj, "resize") and size is not None:
+            obj.resize(new_size=size)
+        return obj
+
+    def read(
+        self,
+        name: str,
+        lh5_file: str | h5py.File | list[str | h5py.File],
+        start_row: int = 0,
+        n_rows: int = sys.maxsize,
+        idx: np.ndarray | list | tuple | list[np.ndarray | list | tuple] = None,
+        use_h5idx: bool = False,
+        field_mask: dict[str, bool] | list[str] | tuple[str] = None,
+        obj_buf: LGDO = None,
+        obj_buf_start: int = 0,
+        decompress: bool = True,
+    ) -> tuple[LGDO, int]:
+        """Read LH5 object data from a file.
+
+        Use the ``idx`` parameter to read out particular rows of the data. The ``use_h5idx`` flag
+        controls whether *only* those rows are read from disk or if the rows are indexed after reading
+        the entire object. Reading individual rows can be orders of magnitude slower than reading
+        the whole object and then indexing the desired rows. The default behavior (``use_h5idx=False``)
+        is to use slightly more memory for a much faster read. See
+        `legend-pydataobj #29 <https://github.com/legend-exp/legend-pydataobj/issues/29>`_
+        for additional information.
+
+        Parameters
+        ----------
+        name
+            Name of the LH5 object to be read (including its group path).
+        lh5_file
+            The file(s) containing the object to be read out. If a list of
+            files, array-like object data will be concatenated into the output
+            object.
+        start_row
+            Starting entry for the object read (for array-like objects). For a
+            list of files, only applies to the first file.
+        n_rows
+            The maximum number of rows to read (for array-like objects). The
+            actual number of rows read will be returned as one of the return
+            values (see below).
+        idx
+            For NumPy-style "fancying indexing" for the read to select only some
+            rows, e.g. after applying some cuts to particular columns.
+            Only selection along the first axis is supported, so tuple arguments
+            must be one-tuples.  If `n_rows` is not false, `idx` will be truncated to
+            `n_rows` before reading. To use with a list of files, can pass in a list of
+            `idx`'s (one for each file) or use a long contiguous list (e.g. built from a previous
+            identical read). If used in conjunction with `start_row` and `n_rows`,
+            will be sliced to obey those constraints, where `n_rows` is
+            interpreted as the (max) number of *selected* values (in `idx`) to be
+            read out. Note that the ``use_h5idx`` parameter controls some behaviour of the
+            read and that the default behavior (``use_h5idx=False``) prioritizes speed over
+            a small memory penalty.
+        use_h5idx
+            ``True`` will directly pass the ``idx`` parameter to the underlying
+            ``h5py`` call such that only the selected rows are read directly into memory,
+            which conserves memory at the cost of speed. There can be a significant penalty
+            to speed for larger files (1 - 2 orders of magnitude longer time).
+            ``False`` (default) will read the entire object into memory before
+            performing the indexing. The default is much faster but requires additional memory,
+            though a relatively small amount in the typical use case. It is recommended to
+            leave this parameter as its default.
+        field_mask
+            For tables and structs, determines which fields get written out.
+            Only applies to immediate fields of the requested objects. If a dict
+            is used, a default dict will be made with the default set to the
+            opposite of the first element in the dict. This way if one specifies
+            a few fields at ``False``, all but those fields will be read out,
+            while if one specifies just a few fields as ``True``, only those
+            fields will be read out. If a list is provided, the listed fields
+            will be set to ``True``, while the rest will default to ``False``.
+        obj_buf
+            Read directly into memory provided in `obj_buf`. Note: the buffer
+            will be expanded to accommodate the data requested. To maintain the
+            buffer length, send in ``n_rows = len(obj_buf)``.
+        obj_buf_start
+            Start location in ``obj_buf`` for read. For concatenating data to
+            array-like objects.
+        decompress
+            Decompress data encoded with LGDO's compression routines right
+            after reading. The option has no effect on data encoded with HDF5
+            built-in filters, which is always decompressed upstream by HDF5.
+
+
+        Returns
+        -------
+        (object, n_rows_read)
+            `object` is the read-out object `n_rows_read` is the number of rows
+            successfully read out. Essential for arrays when the amount of data
+            is smaller than the object buffer.  For scalars and structs
+            `n_rows_read` will be``1``. For tables it is redundant with
+            ``table.loc``.
+        """
+        # Handle list-of-files recursively
+        if not isinstance(lh5_file, (str, h5py.File)):
+            lh5_file = list(lh5_file)
+            n_rows_read = 0
+
+            # to know whether we are reading in a list of files.
+            # this is part of the fix for reading data by idx
+            # (see https://github.com/legend-exp/legend-pydataobj/issues/29)
+            # so that we only make a copy of the data if absolutely necessary
+            # or if we can read the data from file without having to make a copy
+            self.in_file_loop = True
+
+            for i, h5f in enumerate(lh5_file):
+                if isinstance(idx, list) and len(idx) > 0 and not np.isscalar(idx[0]):
+                    # a list of lists: must be one per file
+                    idx_i = idx[i]
+                elif idx is not None:
+                    # make idx a proper tuple if it's not one already
+                    if not (isinstance(idx, tuple) and len(idx) == 1):
+                        idx = (idx,)
+                    # idx is a long continuous array
+                    n_rows_i = self.read_n_rows(name, h5f)
+                    # find the length of the subset of idx that contains indices
+                    # that are less than n_rows_i
+                    n_rows_to_read_i = bisect_left(idx[0], n_rows_i)
+                    # now split idx into idx_i and the remainder
+                    idx_i = (idx[0][:n_rows_to_read_i],)
+                    idx = (idx[0][n_rows_to_read_i:] - n_rows_i,)
+                else:
+                    idx_i = None
+                n_rows_i = n_rows - n_rows_read
+
+                # maybe someone passed in a list of len==1?
+                if i == (len(lh5_file) - 1):
+                    self.in_file_loop = False
+
+                obj_buf, n_rows_read_i = self.read(
+                    name,
+                    lh5_file[i],
+                    start_row=start_row,
+                    n_rows=n_rows_i,
+                    idx=idx_i,
+                    use_h5idx=use_h5idx,
+                    field_mask=field_mask,
+                    obj_buf=obj_buf,
+                    obj_buf_start=obj_buf_start,
+                    decompress=decompress,
+                )
+
+                n_rows_read += n_rows_read_i
+                if n_rows_read >= n_rows or obj_buf is None:
+                    return obj_buf, n_rows_read
+                start_row = 0
+                obj_buf_start += n_rows_read_i
+
+            self.in_file_loop = False
+
+            return obj_buf, n_rows_read
+
+        # get the file from the store
+        h5f = self.gimme_file(lh5_file, "r")
+        if not h5f or name not in h5f:
+            raise KeyError(f"'{name}' not in {h5f.filename}")
+
+        log.debug(
+            f"reading {h5f.filename}:{name}[{start_row}:{n_rows}], decompress = {decompress}, "
+            + (f" with field mask {field_mask}" if field_mask else "")
+        )
+
+        # make idx a proper tuple if it's not one already
+        if not (isinstance(idx, tuple) and len(idx) == 1):
+            if idx is not None:
+                idx = (idx,)
+
+        # get the object's datatype
+        if "datatype" not in h5f[name].attrs:
+            raise RuntimeError(
+                f"'{name}' in file {lh5_file} is missing the datatype attribute"
+            )
+
+        datatype = h5f[name].attrs["datatype"]
+        datatype, shape, elements = parse_datatype(datatype)
+
+        # check field_mask and make it a default dict
+        if datatype == "struct" or datatype == "table":
+            if field_mask is None:
+                field_mask = defaultdict(lambda: True)
+            elif isinstance(field_mask, dict):
+                default = True
+                if len(field_mask) > 0:
+                    default = not field_mask[list(field_mask.keys())[0]]
+                field_mask = defaultdict(lambda: default, field_mask)
+            elif isinstance(field_mask, (list, tuple)):
+                field_mask = defaultdict(
+                    lambda: False, {field: True for field in field_mask}
+                )
+            elif not isinstance(field_mask, defaultdict):
+                raise RuntimeError("bad field_mask of type", type(field_mask).__name__)
+        elif field_mask is not None:
+            raise RuntimeError(f"datatype {datatype} does not accept a field_mask")
+
+        # Scalar
+        # scalars are dim-0 datasets
+        if datatype == "scalar":
+            value = h5f[name][()]
+            if elements == "bool":
+                value = np.bool_(value)
+            if obj_buf is not None:
+                obj_buf.value = value
+                obj_buf.attrs.update(h5f[name].attrs)
+                return obj_buf, 1
+            else:
+                return Scalar(value=value, attrs=h5f[name].attrs), 1
+
+        # Struct
+        # recursively build a struct, return as a dictionary
+        if datatype == "struct":
+            # ignore obj_buf.
+            # TODO: could append new fields or overwrite/concat to existing
+            # fields. If implemented, get_buffer() above should probably also
+            # (optionally?) prep buffers for each field
+            if obj_buf is not None:
+                raise NotImplementedError("obj_buf not implemented for LGOD Structs")
+
+            # loop over fields and read
+            obj_dict = {}
+            for field in elements:
+                if not field_mask[field]:
+                    continue
+                # TODO: it's strange to pass start_row, n_rows, idx to struct
+                # fields. If they all had shared indexing, they should be in a
+                # table... Maybe should emit a warning? Or allow them to be
+                # dicts keyed by field name?
+                if "int_keys" in h5f[name].attrs:
+                    if dict(h5f[name].attrs)["int_keys"]:
+                        f = int(field)
+                else:
+                    f = str(field)
+                obj_dict[f], _ = self.read(
+                    name + "/" + field,
+                    h5f,
+                    start_row=start_row,
+                    n_rows=n_rows,
+                    idx=idx,
+                    use_h5idx=use_h5idx,
+                    decompress=decompress,
+                )
+            # modify datatype in attrs if a field_mask was used
+            attrs = dict(h5f[name].attrs)
+            if field_mask is not None:
+                selected_fields = []
+                for field in elements:
+                    if field_mask[field]:
+                        selected_fields.append(field)
+                attrs["datatype"] = "struct" + "{" + ",".join(selected_fields) + "}"
+            return Struct(obj_dict=obj_dict, attrs=attrs), 1
+
+        # Below here is all array-like types. So trim idx if needed
+        if idx is not None:
+            # chop off indices < start_row
+            i_first_valid = bisect_left(idx[0], start_row)
+            idxa = idx[0][i_first_valid:]
+            # don't readout more than n_rows indices
+            idx = (idxa[:n_rows],)  # works even if n_rows > len(idxa)
+
+        # Table or WaveformTable
+        if datatype == "table":
+            col_dict = {}
+
+            # read out each of the fields
+            rows_read = []
+            for field in elements:
+                if not field_mask[field]:
+                    continue
+
+                fld_buf = None
+                if obj_buf is not None:
+                    if not isinstance(obj_buf, Table) or field not in obj_buf:
+                        raise ValueError(
+                            f"obj_buf for LGDO Table '{name}' not formatted correctly"
+                        )
+
+                    else:
+                        fld_buf = obj_buf[field]
+
+                col_dict[field], n_rows_read = self.read(
+                    name + "/" + field,
+                    h5f,
+                    start_row=start_row,
+                    n_rows=n_rows,
+                    idx=idx,
+                    use_h5idx=use_h5idx,
+                    obj_buf=fld_buf,
+                    obj_buf_start=obj_buf_start,
+                    decompress=decompress,
+                )
+                if obj_buf is not None and obj_buf_start + n_rows_read > len(obj_buf):
+                    obj_buf.resize(obj_buf_start + n_rows_read)
+
+                rows_read.append(n_rows_read)
+
+            # warn if all columns don't read in the same number of rows
+            if len(rows_read) > 0:
+                n_rows_read = rows_read[0]
+            else:
+                n_rows_read = 0
+                log.warning(f"Table '{name}' has no subgroups accepted by field mask")
+
+            for n in rows_read[1:]:
+                if n != n_rows_read:
+                    log.warning(
+                        f"Table '{name}' got strange n_rows_read = {n}, {n_rows_read} was expected ({rows_read})"
+                    )
+
+            # modify datatype in attrs if a field_mask was used
+            attrs = dict(h5f[name].attrs)
+            if field_mask is not None:
+                selected_fields = []
+                for field in elements:
+                    if field_mask[field]:
+                        selected_fields.append(field)
+                attrs["datatype"] = "table" + "{" + ",".join(selected_fields) + "}"
+
+            # fields have been read out, now return a table
+            if obj_buf is None:
+                # if col_dict contains just 3 objects called t0, dt, and values,
+                # return a WaveformTable
+                if (
+                    len(col_dict) == 3
+                    and "t0" in col_dict
+                    and "dt" in col_dict
+                    and "values" in col_dict
+                ):
+                    table = WaveformTable(
+                        t0=col_dict["t0"], dt=col_dict["dt"], values=col_dict["values"]
+                    )
+                else:
+                    table = Table(col_dict=col_dict, attrs=attrs)
+
+                # set (write) loc to end of tree
+                table.loc = n_rows_read
+                return table, n_rows_read
+            else:
+                # We have read all fields into the object buffer. Run
+                # checks: All columns should be the same size. So update
+                # table's size as necessary, warn if any mismatches are found
+                obj_buf.resize(do_warn=True)
+                # set (write) loc to end of tree
+                obj_buf.loc = obj_buf_start + n_rows_read
+                # check attributes
+                if set(obj_buf.attrs.keys()) != set(attrs.keys()):
+                    raise RuntimeError(
+                        f"attrs mismatch. obj_buf.attrs: "
+                        f"{obj_buf.attrs}, h5f[{name}].attrs: {attrs}"
+                    )
+                return obj_buf, n_rows_read
+
+        # ArrayOfEncodedEqualSizedArrays and VectorOfEncodedVectors
+        for cond, enc_lgdo in [
+            (
+                datatype == "array_of_encoded_equalsized_arrays",
+                ArrayOfEncodedEqualSizedArrays,
+            ),
+            (elements.startswith("encoded_array"), VectorOfEncodedVectors),
+        ]:
+            if cond:
+                if (
+                    not decompress
+                    and obj_buf is not None
+                    and not isinstance(obj_buf, enc_lgdo)
+                ):
+                    raise ValueError(f"obj_buf for '{name}' not a {enc_lgdo}")
+
+                # read out decoded_size, either a Scalar or an Array
+                decoded_size_buf = encoded_data_buf = None
+                if obj_buf is not None and not decompress:
+                    decoded_size_buf = obj_buf.decoded_size
+                    encoded_data_buf = obj_buf.encoded_data
+
+                decoded_size, _ = self.read(
+                    f"{name}/decoded_size",
+                    h5f,
+                    start_row=start_row,
+                    n_rows=n_rows,
+                    idx=idx,
+                    use_h5idx=use_h5idx,
+                    obj_buf=None if decompress else decoded_size_buf,
+                    obj_buf_start=0 if decompress else obj_buf_start,
+                )
+
+                # read out encoded_data, a VectorOfVectors
+                encoded_data, n_rows_read = self.read(
+                    f"{name}/encoded_data",
+                    h5f,
+                    start_row=start_row,
+                    n_rows=n_rows,
+                    idx=idx,
+                    use_h5idx=use_h5idx,
+                    obj_buf=None if decompress else encoded_data_buf,
+                    obj_buf_start=0 if decompress else obj_buf_start,
+                )
+
+                # return the still encoded data in the buffer object, if there
+                if obj_buf is not None and not decompress:
+                    return obj_buf, n_rows_read
+
+                # otherwise re-create the encoded LGDO
+                rawdata = enc_lgdo(
+                    encoded_data=encoded_data,
+                    decoded_size=decoded_size,
+                    attrs=h5f[name].attrs,
+                )
+
+                # already return if no decompression is requested
+                if not decompress:
+                    return rawdata, n_rows_read
+
+                # if no buffer, decode and return
+                elif obj_buf is None and decompress:
+                    return compress.decode(rawdata), n_rows_read
+
+                # eventually expand provided obj_buf, if too short
+                buf_size = obj_buf_start + n_rows_read
+                if len(obj_buf) < buf_size:
+                    obj_buf.resize(buf_size)
+
+                # use the (decoded object type) buffer otherwise
+                if enc_lgdo == ArrayOfEncodedEqualSizedArrays:
+                    if not isinstance(obj_buf, ArrayOfEqualSizedArrays):
+                        raise ValueError(
+                            f"obj_buf for decoded '{name}' not an ArrayOfEqualSizedArrays"
+                        )
+
+                    compress.decode(rawdata, obj_buf[obj_buf_start:buf_size])
+
+                elif enc_lgdo == VectorOfEncodedVectors:
+                    if not isinstance(obj_buf, VectorOfVectors):
+                        raise ValueError(
+                            f"obj_buf for decoded '{name}' not a VectorOfVectors"
+                        )
+
+                    # FIXME: not a good idea. an in place decoding version
+                    # of decode would be needed to avoid extra memory
+                    # allocations
+                    for i, wf in enumerate(compress.decode(rawdata)):
+                        obj_buf[obj_buf_start + i] = wf
+
+                return obj_buf, n_rows_read
+
+        # VectorOfVectors
+        # read out vector of vectors of different size
+        if elements.startswith("array"):
+            if obj_buf is not None and not isinstance(obj_buf, VectorOfVectors):
+                raise ValueError(f"obj_buf for '{name}' not a LGDO VectorOfVectors")
+
+            # read out cumulative_length
+            cumulen_buf = None if obj_buf is None else obj_buf.cumulative_length
+            cumulative_length, n_rows_read = self.read(
+                f"{name}/cumulative_length",
+                h5f,
+                start_row=start_row,
+                n_rows=n_rows,
+                idx=idx,
+                use_h5idx=use_h5idx,
+                obj_buf=cumulen_buf,
+                obj_buf_start=obj_buf_start,
+            )
+            # get a view of just what was read out for cleaner code below
+            this_cumulen_nda = cumulative_length.nda[
+                obj_buf_start : obj_buf_start + n_rows_read
+            ]
+
+            if idx is not None and n_rows_read > 0:
+                # get the starting indices for each array in flattended data:
+                # the starting index for array[i] is cumulative_length[i-1]
+                idx2 = (np.asarray(idx[0]).copy() - 1,)
+                # re-read cumulative_length with these indices
+                # note this will allocate memory for fd_starts!
+                fd_start = None
+                if idx2[0][0] == -1:
+                    idx2 = (idx2[0][1:],)
+                    fd_start = 0  # this variable avoids an ndarray append
+                fd_starts, fds_n_rows_read = self.read(
+                    f"{name}/cumulative_length",
+                    h5f,
+                    start_row=start_row,
+                    n_rows=n_rows,
+                    idx=idx2,
+                    use_h5idx=use_h5idx,
+                )
+                fd_starts = fd_starts.nda  # we just need the nda
+                if fd_start is None:
+                    fd_start = fd_starts[0]
+
+                # compute the length that flattened_data will have after the
+                # fancy-indexed read
+                fd_n_rows = np.sum(this_cumulen_nda[-len(fd_starts) :] - fd_starts)
+                if fd_start == 0:
+                    fd_n_rows += this_cumulen_nda[0]
+
+                # now make fd_idx
+                fd_idx = np.empty(fd_n_rows, dtype="uint32")
+                fd_idx = _make_fd_idx(fd_starts, this_cumulen_nda, fd_idx)
+
+                # Now clean up this_cumulen_nda, to be ready
+                # to match the in-memory version of flattened_data. Note: these
+                # operations on the view change the original array because they are
+                # numpy arrays, not lists.
+                this_cumulen_nda[-len(fd_starts) :] -= fd_starts
+                np.cumsum(this_cumulen_nda, out=this_cumulen_nda)
+
+            else:
+                fd_idx = None
+
+                # determine the start_row and n_rows for the flattened_data readout
+                fd_start = 0
+                if start_row > 0 and n_rows_read > 0:
+                    # need to read out the cumulen sample -before- the first sample
+                    # read above in order to get the starting row of the first
+                    # vector to read out in flattened_data
+                    fd_start = h5f[f"{name}/cumulative_length"][start_row - 1]
+
+                    # check limits for values that will be used subsequently
+                    if this_cumulen_nda[-1] < fd_start:
+                        log.debug(
+                            f"this_cumulen_nda[-1] = {this_cumulen_nda[-1]}, "
+                            f"fd_start = {fd_start}, "
+                            f"start_row = {start_row}, "
+                            f"n_rows_read = {n_rows_read}"
+                        )
+                        raise RuntimeError(
+                            f"cumulative_length non-increasing between entries "
+                            f"{start_row} and {start_row+n_rows_read} ??"
+                        )
+
+                # determine the number of rows for the flattened_data readout
+                fd_n_rows = this_cumulen_nda[-1] if n_rows_read > 0 else 0
+
+                # Now done with this_cumulen_nda, so we can clean it up to be ready
+                # to match the in-memory version of flattened_data. Note: these
+                # operations on the view change the original array because they are
+                # numpy arrays, not lists.
+                #
+                # First we need to subtract off the in-file offset for the start of
+                # read for flattened_data
+                this_cumulen_nda -= fd_start
+
+            # If we started with a partially-filled buffer, add the
+            # appropriate offset for the start of the in-memory flattened
+            # data for this read.
+            fd_buf_start = np.uint32(0)
+            if obj_buf_start > 0:
+                fd_buf_start = cumulative_length.nda[obj_buf_start - 1]
+                this_cumulen_nda += fd_buf_start
+
+            # Now prepare the object buffer if necessary
+            fd_buf = None
+            if obj_buf is not None:
+                fd_buf = obj_buf.flattened_data
+                # grow fd_buf if necessary to hold the data
+                fdb_size = fd_buf_start + fd_n_rows
+                if len(fd_buf) < fdb_size:
+                    fd_buf.resize(fdb_size)
+
+            # now read
+            flattened_data, dummy_rows_read = self.read(
+                f"{name}/flattened_data",
+                h5f,
+                start_row=fd_start,
+                n_rows=fd_n_rows,
+                idx=fd_idx,
+                use_h5idx=use_h5idx,
+                obj_buf=fd_buf,
+                obj_buf_start=fd_buf_start,
+            )
+            if obj_buf is not None:
+                return obj_buf, n_rows_read
+            return (
+                VectorOfVectors(
+                    flattened_data=flattened_data,
+                    cumulative_length=cumulative_length,
+                    attrs=h5f[name].attrs,
+                ),
+                n_rows_read,
+            )
+
+        # Array
+        # FixedSizeArray
+        # ArrayOfEqualSizedArrays
+        # read out all arrays by slicing
+        if "array" in datatype:
+            if obj_buf is not None:
+                if not isinstance(obj_buf, Array):
+                    raise ValueError(f"obj_buf for '{name}' not an LGDO Array")
+                    obj_buf = None
+
+            # compute the number of rows to read
+            # we culled idx above for start_row and n_rows, now we have to apply
+            # the constraint of the length of the dataset
+            ds_n_rows = h5f[name].shape[0]
+            if idx is not None:
+                if len(idx[0]) > 0 and idx[0][-1] >= ds_n_rows:
+                    log.warning(
+                        "idx indexed past the end of the array in the file. Culling..."
+                    )
+                    n_rows_to_read = bisect_left(idx[0], ds_n_rows)
+                    idx = (idx[0][:n_rows_to_read],)
+                    if len(idx[0]) == 0:
+                        log.warning("idx empty after culling.")
+                n_rows_to_read = len(idx[0])
+            else:
+                n_rows_to_read = ds_n_rows - start_row
+            if n_rows_to_read > n_rows:
+                n_rows_to_read = n_rows
+
+            # if idx is passed, check if we can make it a slice instead (faster)
+            change_idx_to_slice = False
+
+            # prepare the selection for the read. Use idx if available
+            if idx is not None:
+                # check if idx is empty and convert to slice instead
+                if len(idx[0]) == 0:
+                    source_sel = np.s_[0:0]
+                    change_idx_to_slice = True
+                # check if idx is contiguous and increasing
+                # if so, convert it to a slice instead (faster)
+                elif np.all(np.diff(idx[0]) == 1):
+                    source_sel = np.s_[idx[0][0] : idx[0][-1] + 1]
+                    change_idx_to_slice = True
+                else:
+                    source_sel = idx
+            else:
+                source_sel = np.s_[start_row : start_row + n_rows_to_read]
+
+            # Now read the array
+            if obj_buf is not None and n_rows_to_read > 0:
+                buf_size = obj_buf_start + n_rows_to_read
+                if len(obj_buf) < buf_size:
+                    obj_buf.resize(buf_size)
+                dest_sel = np.s_[obj_buf_start:buf_size]
+
+                # this is required to make the read of multiple files faster
+                # until a better solution found.
+                if change_idx_to_slice or idx is None or use_h5idx:
+                    h5f[name].read_direct(obj_buf.nda, source_sel, dest_sel)
+                else:
+                    # it is faster to read the whole object and then do fancy indexing
+                    obj_buf.nda[dest_sel] = h5f[name][...][source_sel]
+
+                nda = obj_buf.nda
+            else:
+                if n_rows == 0:
+                    tmp_shape = (0,) + h5f[name].shape[1:]
+                    nda = np.empty(tmp_shape, h5f[name].dtype)
+                else:
+                    if change_idx_to_slice or idx is None or use_h5idx:
+                        nda = h5f[name][source_sel]
+                    else:
+                        # it is faster to read the whole object and then do fancy indexing
+                        nda = h5f[name][...][source_sel]
+
+                        # if reading a list of files recursively, this is given to obj_buf on
+                        # the first file read. obj_buf needs to be resized and therefore
+                        # it needs to hold the data itself (not a view of the data).
+                        # a view is returned by the source_sel indexing, which cannot be resized
+                        # by ndarray.resize().
+                        if hasattr(self, "in_file_loop") and self.in_file_loop:
+                            nda = np.copy(nda)
+
+            # special handling for bools
+            # (c and Julia store as uint8 so cast to bool)
+            if elements == "bool":
+                nda = nda.astype(np.bool_)
+
+            # Finally, set attributes and return objects
+            attrs = h5f[name].attrs
+            if obj_buf is None:
+                if datatype == "array":
+                    return Array(nda=nda, attrs=attrs), n_rows_to_read
+                if datatype == "fixedsize_array":
+                    return FixedSizeArray(nda=nda, attrs=attrs), n_rows_to_read
+                if datatype == "array_of_equalsized_arrays":
+                    return (
+                        ArrayOfEqualSizedArrays(nda=nda, dims=shape, attrs=attrs),
+                        n_rows_to_read,
+                    )
+            else:
+                if set(obj_buf.attrs.keys()) != set(attrs.keys()):
+                    raise RuntimeError(
+                        f"attrs mismatch. "
+                        f"obj_buf.attrs: {obj_buf.attrs}, "
+                        f"h5f[{name}].attrs: {attrs}"
+                    )
+                return obj_buf, n_rows_to_read
+
+        raise RuntimeError("don't know how to read datatype {datatype}")
+
+    def write(
+        self,
+        obj: LGDO,
+        name: str,
+        lh5_file: str | h5py.File,
+        group: str | h5py.Group = "/",
+        start_row: int = 0,
+        n_rows: int = None,
+        wo_mode: str = "append",
+        write_start: int = 0,
+        **h5py_kwargs,
+    ) -> None:
+        """Write an LGDO into an LH5 file.
+
+        If the `obj` :class:`.LGDO` has a `compression` attribute, its value is
+        interpreted as the algorithm to be used to compress `obj` before
+        writing to disk. The type of `compression` can be:
+
+        string, kwargs dictionary, hdf5plugin filter
+          interpreted as the name of a built-in or custom `HDF5 compression
+          filter <https://docs.h5py.org/en/stable/high/dataset.html#filter-pipeline>`_
+          (``"gzip"``, ``"lzf"``, :mod:`hdf5plugin` filter object etc.) and
+          passed directly to :meth:`h5py.Group.create_dataset`.
+
+        :class:`.WaveformCodec` object
+          If `obj` is a :class:`.WaveformTable` and ``obj.values`` holds the
+          attribute, compress ``values`` using this algorithm. More
+          documentation about the supported waveform compression algorithms at
+          :mod:`.lgdo.compression`.
+
+        If the `obj` :class:`.LGDO` has a `hdf5_settings` attribute holding a
+        dictionary, it is interpreted as a list of keyword arguments to be
+        forwarded directly to :meth:`h5py.Group.create_dataset` (exactly like
+        the first format of `compression` above). This is the preferred way to
+        specify HDF5 dataset options such as chunking etc. If compression
+        options are specified, they take precedence over those set with the
+        `compression` attribute.
+
+        Note
+        ----
+        The `compression` LGDO attribute takes precedence over the default HDF5
+        compression settings. The `hdf5_settings` attribute takes precedence
+        over `compression`. These attributes are not written to disk.
+
+        Note
+        ----
+        HDF5 compression is skipped for the `encoded_data.flattened_data`
+        dataset of :class:`.VectorOfEncodedVectors` and
+        :class:`.ArrayOfEncodedEqualSizedArrays`.
+
+        Parameters
+        ----------
+        obj
+            LH5 object. if object is array-like, writes `n_rows` starting from
+            `start_row` in `obj`.
+        name
+            name of the object in the output HDF5 file.
+        lh5_file
+            HDF5 file name or :class:`h5py.File` object.
+        group
+            HDF5 group name or :class:`h5py.Group` object in which `obj` should
+            be written.
+        start_row
+            first row in `obj` to be written.
+        n_rows
+            number of rows in `obj` to be written.
+        wo_mode
+            - ``write_safe`` or ``w``: only proceed with writing if the
+              object does not already exist in the file.
+            - ``append`` or ``a``: append along axis 0 (the first dimension)
+              of array-like objects and array-like subfields of structs.
+              :class:`~.lgdo.scalar.Scalar` objects get overwritten.
+            - ``overwrite`` or ``o``: replace data in the file if present,
+              starting from `write_start`. Note: overwriting with `write_start` =
+              end of array is the same as ``append``.
+            - ``overwrite_file`` or ``of``: delete file if present prior to
+              writing to it. `write_start` should be 0 (its ignored).
+            - ``append_column`` or ``ac``: append columns from an :class:`~.lgdo.table.Table`
+              `obj` only if there is an existing :class:`~.lgdo.table.Table` in the `lh5_file` with
+              the same `name` and :class:`~.lgdo.table.Table.size`. If the sizes don't match,
+              or if there are matching fields, it errors out.
+        write_start
+            row in the output file (if already existing) to start overwriting
+            from.
+        **h5py_kwargs
+            additional keyword arguments forwarded to
+            :meth:`h5py.Group.create_dataset` to specify, for example, an HDF5
+            compression filter to be applied before writing non-scalar
+            datasets. **Note: `compression` Ignored if compression is specified
+            as an `obj` attribute.**
+        """
+        log.debug(
+            f"writing {repr(obj)}[{start_row}:{n_rows}] as "
+            f"{lh5_file}:{group}/{name}[{write_start}:], "
+            f"mode = {wo_mode}, h5py_kwargs = {h5py_kwargs}"
+        )
+
+        if wo_mode == "write_safe":
+            wo_mode = "w"
+        if wo_mode == "append":
+            wo_mode = "a"
+        if wo_mode == "overwrite":
+            wo_mode = "o"
+        if wo_mode == "overwrite_file":
+            wo_mode = "of"
+            write_start = 0
+        if wo_mode == "append_column":
+            wo_mode = "ac"
+        if wo_mode not in ["w", "a", "o", "of", "ac"]:
+            raise ValueError(f"unknown wo_mode '{wo_mode}'")
+
+        # "mode" is for the h5df.File and wo_mode is for this function
+        # In hdf5, 'a' is really "modify" -- in addition to appending, you can
+        # change any object in the file. So we use file:append for
+        # write_object:overwrite.
+        mode = "w" if wo_mode == "of" else "a"
+        lh5_file = self.gimme_file(lh5_file, mode=mode)
+        group = self.gimme_group(group, lh5_file)
+        if wo_mode == "w" and name in group:
+            raise RuntimeError(f"can't overwrite '{name}' in wo_mode 'write_safe'")
+
+        # struct or table or waveform table
+        if isinstance(obj, Struct):
+            # In order to append a column, we need to update the `table{old_fields}` value in `group.attrs['datatype"]` to include the new fields.
+            # One way to do this is to override `obj.attrs["datatype"]` to include old and new fields. Then we can write the fields to the table as normal.
+            if wo_mode == "ac":
+                old_group = self.gimme_group(name, group)
+                datatype, shape, fields = parse_datatype(old_group.attrs["datatype"])
+                if datatype not in ["table", "struct"]:
+                    raise RuntimeError(
+                        f"Trying to append columns to an object of type {datatype}"
+                    )
+
+                # If the mode is `append_column`, make sure we aren't appending a table that has a column of the same name as in the existing table
+                # Also make sure that the field we are adding has the same size
+                if len(list(set(fields).intersection(set(obj.keys())))) != 0:
+                    raise ValueError(
+                        f"Can't append {list(set(fields).intersection(set(obj.keys())))} column(s) to a table with the same field(s)"
+                    )
+                # It doesn't matter what key we access, as all fields in the old table have the same size
+                if old_group[list(old_group.keys())[0]].size != obj.size:
+                    raise ValueError(
+                        f"Table sizes don't match. Trying to append column of size {obj.size} to a table of size {old_group[list(old_group.keys())[0]].size}."
+                    )
+
+                # Now we can append the obj.keys() to the old fields, and then update obj.attrs.
+                fields.extend(list(obj.keys()))
+                obj.attrs.pop("datatype")
+                obj.attrs["datatype"] = "table" + "{" + ",".join(fields) + "}"
+
+            group = self.gimme_group(
+                name,
+                group,
+                grp_attrs=obj.attrs,
+                overwrite=(wo_mode in ["o", "ac"]),
+            )
+            # If the mode is overwrite, then we need to peek into the file's table's existing fields
+            # If we are writing a new table to the group that does not contain an old field, we should delete that old field from the file
+            if wo_mode == "o":
+                # Find the old keys in the group that are not present in the new table's keys, then delete them
+                for key in list(set(group.keys()) - set(obj.keys())):
+                    log.debug(f"{key} is not present in new table, deleting field")
+                    del group[key]
+
+            for field in obj.keys():
+                # eventually compress waveform table values with LGDO's
+                # custom codecs before writing
+                # if waveformtable.values.attrs["compression"] is NOT a
+                # WaveformCodec, just leave it there
+                obj_fld = None
+                if (
+                    isinstance(obj, WaveformTable)
+                    and field == "values"
+                    and not isinstance(obj.values, VectorOfEncodedVectors)
+                    and not isinstance(obj.values, ArrayOfEncodedEqualSizedArrays)
+                    and "compression" in obj.values.attrs
+                    and isinstance(obj.values.attrs["compression"], WaveformCodec)
+                ):
+                    codec = obj.values.attrs["compression"]
+                    obj_fld = compress.encode(obj.values, codec=codec)
+                else:
+                    obj_fld = obj[field]
+
+                # Convert keys to string for dataset names
+                f = str(field)
+                self.write(
+                    obj_fld,
+                    f,
+                    lh5_file,
+                    group=group,
+                    start_row=start_row,
+                    n_rows=n_rows,
+                    wo_mode=wo_mode,
+                    write_start=write_start,
+                    **h5py_kwargs,
+                )
+            return
+
+        # scalars
+        elif isinstance(obj, Scalar):
+            if name in group:
+                if wo_mode in ["o", "a"]:
+                    log.debug(f"overwriting {name} in {group}")
+                    del group[name]
+                else:
+                    raise RuntimeError(
+                        f"tried to overwrite {name} in {group} for wo_mode {wo_mode}"
+                    )
+            ds = group.create_dataset(name, shape=(), data=obj.value)
+            ds.attrs.update(obj.attrs)
+            return
+
+        # vector of encoded vectors
+        elif isinstance(obj, (VectorOfEncodedVectors, ArrayOfEncodedEqualSizedArrays)):
+            group = self.gimme_group(
+                name, group, grp_attrs=obj.attrs, overwrite=(wo_mode == "o")
+            )
+
+            # ask not to further compress flattened_data, it is already compressed!
+            obj.encoded_data.flattened_data.attrs["compression"] = None
+
+            self.write(
+                obj.encoded_data,
+                "encoded_data",
+                lh5_file,
+                group=group,
+                start_row=start_row,
+                n_rows=n_rows,
+                wo_mode=wo_mode,
+                write_start=write_start,
+                **h5py_kwargs,
+            )
+
+            self.write(
+                obj.decoded_size,
+                "decoded_size",
+                lh5_file,
+                group=group,
+                start_row=start_row,
+                n_rows=n_rows,
+                wo_mode=wo_mode,
+                write_start=write_start,
+                **h5py_kwargs,
+            )
+
+        # vector of vectors
+        elif isinstance(obj, VectorOfVectors):
+            group = self.gimme_group(
+                name, group, grp_attrs=obj.attrs, overwrite=(wo_mode == "o")
+            )
+            if (
+                n_rows is None
+                or n_rows > obj.cumulative_length.nda.shape[0] - start_row
+            ):
+                n_rows = obj.cumulative_length.nda.shape[0] - start_row
+
+            # if appending we need to add an appropriate offset to the
+            # cumulative lengths as appropriate for the in-file object
+            offset = 0  # declare here because we have to subtract it off at the end
+            if (wo_mode == "a" or wo_mode == "o") and "cumulative_length" in group:
+                len_cl = len(group["cumulative_length"])
+                if wo_mode == "a":
+                    write_start = len_cl
+                if len_cl > 0:
+                    offset = group["cumulative_length"][write_start - 1]
+
+            # First write flattened_data array. Only write rows with data.
+            fd_start = 0 if start_row == 0 else obj.cumulative_length.nda[start_row - 1]
+            fd_n_rows = obj.cumulative_length.nda[start_row + n_rows - 1] - fd_start
+            self.write(
+                obj.flattened_data,
+                "flattened_data",
+                lh5_file,
+                group=group,
+                start_row=fd_start,
+                n_rows=fd_n_rows,
+                wo_mode=wo_mode,
+                write_start=offset,
+                **h5py_kwargs,
+            )
+
+            # now offset is used to give appropriate in-file values for
+            # cumulative_length. Need to adjust it for start_row
+            if start_row > 0:
+                offset -= obj.cumulative_length.nda[start_row - 1]
+
+            # Add offset to obj.cumulative_length itself to avoid memory allocation.
+            # Then subtract it off after writing! (otherwise it will be changed
+            # upon return)
+            cl_dtype = obj.cumulative_length.nda.dtype.type
+            obj.cumulative_length.nda += cl_dtype(offset)
+
+            self.write(
+                obj.cumulative_length,
+                "cumulative_length",
+                lh5_file,
+                group=group,
+                start_row=start_row,
+                n_rows=n_rows,
+                wo_mode=wo_mode,
+                write_start=write_start,
+                **h5py_kwargs,
+            )
+            obj.cumulative_length.nda -= cl_dtype(offset)
+
+            return
+
+        # if we get this far, must be one of the Array types
+        elif isinstance(obj, Array):
+            if n_rows is None or n_rows > obj.nda.shape[0] - start_row:
+                n_rows = obj.nda.shape[0] - start_row
+
+            nda = obj.nda[start_row : start_row + n_rows]
+
+            # hack to store bools as uint8 for c / Julia compliance
+            if nda.dtype.name == "bool":
+                nda = nda.astype(np.uint8)
+
+            # need to create dataset from ndarray the first time for speed
+            # creating an empty dataset and appending to that is super slow!
+            if (wo_mode != "a" and write_start == 0) or name not in group:
+                # this is needed in order to have a resizable (in the first
+                # axis) data set, i.e. rows can be appended later
+                # NOTE: this automatically turns chunking on!
+                maxshape = (None,) + nda.shape[1:]
+                h5py_kwargs.setdefault("maxshape", maxshape)
+
+                if wo_mode == "o" and name in group:
+                    log.debug(f"overwriting {name} in {group}")
+                    del group[name]
+
+                # set default compression options
+                for k, v in DEFAULT_HDF5_SETTINGS.items():
+                    h5py_kwargs.setdefault(k, v)
+
+                # compress using the 'compression' LGDO attribute, if available
+                if "compression" in obj.attrs:
+                    comp_algo = obj.attrs["compression"]
+                    if isinstance(comp_algo, dict):
+                        h5py_kwargs |= obj.attrs["compression"]
+                    else:
+                        h5py_kwargs["compression"] = obj.attrs["compression"]
+
+                # and even the 'hdf5_settings' one, preferred
+                if "hdf5_settings" in obj.attrs:
+                    h5py_kwargs |= obj.attrs["hdf5_settings"]
+
+                # create HDF5 dataset
+                ds = group.create_dataset(name, data=nda, **h5py_kwargs)
+
+                # attach HDF5 dataset attributes, but not "compression"!
+                _attrs = obj.getattrs(datatype=True)
+                _attrs.pop("compression", None)
+                _attrs.pop("hdf5_settings", None)
+                ds.attrs.update(_attrs)
+                return
+
+            # Now append or overwrite
+            ds = group[name]
+            if not isinstance(ds, h5py.Dataset):
+                raise RuntimeError(
+                    f"existing HDF5 object '{name}' in group '{group}'"
+                    " is not a dataset! Cannot overwrite or append"
+                )
+
+            old_len = ds.shape[0]
+            if wo_mode == "a":
+                write_start = old_len
+            add_len = write_start + nda.shape[0] - old_len
+            ds.resize(old_len + add_len, axis=0)
+            ds[write_start:] = nda
+            return
+
+        else:
+            raise RuntimeError(
+                f"do not know how to write '{name}' of type '{type(obj).__name__}'"
+            )
+
+    def read_n_rows(self, name: str, lh5_file: str | h5py.File) -> int | None:
+        """Look up the number of rows in an Array-like object called `name` in
+        `lh5_file`.
+
+        Return ``None`` if it is a :class:`.Scalar` or a :class:`.Struct`."""
+        # this is basically a stripped down version of read_object
+        h5f = self.gimme_file(lh5_file, "r")
+        if not h5f or name not in h5f:
+            raise KeyError(f"'{name}' not in {lh5_file}")
+
+        # get the datatype
+        if "datatype" not in h5f[name].attrs:
+            raise RuntimeError(
+                f"'{name}' in file {lh5_file} is missing the datatype attribute"
+            )
+
+        datatype = h5f[name].attrs["datatype"]
+        datatype, shape, elements = parse_datatype(datatype)
+
+        # scalars are dim-0 datasets
+        if datatype == "scalar":
+            return None
+
+        # structs don't have rows
+        if datatype == "struct":
+            return None
+
+        # tables should have elements with all the same length
+        if datatype == "table":
+            # read out each of the fields
+            rows_read = None
+            for field in elements:
+                n_rows_read = self.read_n_rows(name + "/" + field, h5f)
+                if not rows_read:
+                    rows_read = n_rows_read
+                elif rows_read != n_rows_read:
+                    log.warning(
+                        f"'{field}' field in table '{name}' has {rows_read} rows, "
+                        f"{n_rows_read} was expected"
+                    )
+            return rows_read
+
+        # length of vector of vectors is the length of its cumulative_length
+        if elements.startswith("array"):
+            return self.read_n_rows(f"{name}/cumulative_length", h5f)
+
+        # length of vector of encoded vectors is the length of its decoded_size
+        if (
+            elements.startswith("encoded_array")
+            or datatype == "array_of_encoded_equalsized_arrays"
+        ):
+            return self.read_n_rows(f"{name}/encoded_data", h5f)
+
+        # return array length (without reading the array!)
+        if "array" in datatype:
+            # compute the number of rows to read
+            return h5f[name].shape[0]
+
+        raise RuntimeError(f"don't know how to read datatype '{datatype}'")
+
+
+def ls(lh5_file: str | h5py.Group, lh5_group: str = "") -> list[str]:
+    """Return a list of LH5 groups in the input file and group, similar
+    to ``ls`` or ``h5ls``. Supports wildcards in group names.
+
+
+    Parameters
+    ----------
+    lh5_file
+        name of file.
+    lh5_group
+        group to search. add a ``/`` to the end of the group name if you want to
+        list all objects inside that group.
+    """
+
+    log.debug(
+        f"Listing objects in '{lh5_file}'"
+        + ("" if lh5_group == "" else f" (and group {lh5_group})")
+    )
+
+    lh5_st = LH5Store()
+    # To use recursively, make lh5_file a h5group instead of a string
+    if isinstance(lh5_file, str):
+        lh5_file = lh5_st.gimme_file(lh5_file, "r")
+        if lh5_group.startswith("/"):
+            lh5_group = lh5_group[1:]
+
+    if lh5_group == "":
+        lh5_group = "*"
+
+    splitpath = lh5_group.split("/", 1)
+    matchingkeys = fnmatch.filter(lh5_file.keys(), splitpath[0])
+
+    if len(splitpath) == 1:
+        return matchingkeys
+    else:
+        ret = []
+        for key in matchingkeys:
+            ret.extend([f"{key}/{path}" for path in ls(lh5_file[key], splitpath[1])])
+        return ret
+
+
+def show(
+    lh5_file: str | h5py.Group,
+    lh5_group: str = "/",
+    attrs: bool = False,
+    indent: str = "",
+    header: bool = True,
+) -> None:
+    """Print a tree of LH5 file contents with LGDO datatype.
+
+    Parameters
+    ----------
+    lh5_file
+        the LH5 file.
+    lh5_group
+        print only contents of this HDF5 group.
+    attrs
+        print the HDF5 attributes too.
+    indent
+        indent the diagram with this string.
+    header
+        print `lh5_group` at the top of the diagram.
+
+    Examples
+    --------
+    >>> from lgdo import show
+    >>> show("file.lh5", "/geds/raw")
+    /geds/raw
+    ├── channel · array<1>{real}
+    ├── energy · array<1>{real}
+    ├── timestamp · array<1>{real}
+    ├── waveform · table{t0,dt,values}
+    │   ├── dt · array<1>{real}
+    │   ├── t0 · array<1>{real}
+    │   └── values · array_of_equalsized_arrays<1,1>{real}
+    └── wf_std · array<1>{real}
+    """
+    # open file
+    if isinstance(lh5_file, str):
+        lh5_file = h5py.File(expand_path(lh5_file), "r")
+
+    # go to group
+    if lh5_group != "/":
+        lh5_file = lh5_file[lh5_group]
+
+    if header:
+        print(f"\033[1m{lh5_group}\033[0m")  # noqa: T201
+
+    # get an iterator over the keys in the group
+    it = iter(lh5_file)
+    key = None
+
+    # make sure there is actually something in this file/group
+    try:
+        key = next(it)  # get first key
+    except StopIteration:
+        print(f"{indent}└──  empty")  # noqa: T201
+        return
+
+    # loop over keys
+    while True:
+        val = lh5_file[key]
+        # we want to print the LGDO datatype
+        dtype = val.attrs.get("datatype", default="no datatype")
+        if dtype == "no datatype" and isinstance(val, h5py.Group):
+            dtype = "HDF5 group"
+
+        _attrs = ""
+        if attrs:
+            attrs_d = dict(val.attrs)
+            attrs_d.pop("datatype", "")
+            _attrs = "── " + str(attrs_d) if attrs_d else ""
+
+        # is this the last key?
+        killme = False
+        try:
+            k_new = next(it)  # get next key
+        except StopIteration:
+            char = "└──"
+            killme = True  # we'll have to kill this loop later
+        else:
+            char = "├──"
+
+        print(f"{indent}{char} \033[1m{key}\033[0m · {dtype} {_attrs}")  # noqa: T201
+
+        # if it's a group, call this function recursively
+        if isinstance(val, h5py.Group):
+            show(
+                val,
+                indent=indent + ("    " if killme else "│   "),
+                header=False,
+                attrs=attrs,
+            )
+
+        # break or move to next key
+        if killme:
+            break
+        else:
+            key = k_new
+
+
+def load_nda(
+    f_list: str | list[str],
+    par_list: list[str],
+    lh5_group: str = "",
+    idx_list: list[np.ndarray | list | tuple] = None,
+) -> dict[str, np.ndarray]:
+    r"""Build a dictionary of :class:`numpy.ndarray`\ s from LH5 data.
+
+    Given a list of files, a list of LH5 table parameters, and an optional
+    group path, return a NumPy array with all values for each parameter.
+
+    Parameters
+    ----------
+    f_list
+        A list of files. Can contain wildcards.
+    par_list
+        A list of parameters to read from each file.
+    lh5_group
+        group path within which to find the specified parameters.
+    idx_list
+        for fancy-indexed reads. Must be one index array for each file in
+        `f_list`.
+
+    Returns
+    -------
+    par_data
+        A dictionary of the parameter data keyed by the elements of `par_list`.
+        Each entry contains the data for the specified parameter concatenated
+        over all files in `f_list`.
+    """
+    if isinstance(f_list, str):
+        f_list = [f_list]
+        if idx_list is not None:
+            idx_list = [idx_list]
+    if idx_list is not None and len(f_list) != len(idx_list):
+        raise ValueError(
+            f"f_list length ({len(f_list)}) != idx_list length ({len(idx_list)})!"
+        )
+
+    # Expand wildcards
+    f_list = [f for f_wc in f_list for f in sorted(glob.glob(os.path.expandvars(f_wc)))]
+
+    sto = LH5Store()
+    par_data = {par: [] for par in par_list}
+    for ii, f in enumerate(f_list):
+        f = sto.gimme_file(f, "r")
+        for par in par_list:
+            if f"{lh5_group}/{par}" not in f:
+                raise RuntimeError(f"'{lh5_group}/{par}' not in file {f_list[ii]}")
+
+            if idx_list is None:
+                data, _ = sto.read(f"{lh5_group}/{par}", f)
+            else:
+                data, _ = sto.read(f"{lh5_group}/{par}", f, idx=idx_list[ii])
+            if not data:
+                continue
+            par_data[par].append(data.nda)
+    par_data = {par: np.concatenate(par_data[par]) for par in par_list}
+    return par_data
+
+
+def load_dfs(
+    f_list: str | list[str],
+    par_list: list[str],
+    lh5_group: str = "",
+    idx_list: list[np.ndarray | list | tuple] = None,
+) -> pd.DataFrame:
+    """Build a :class:`pandas.DataFrame` from LH5 data.
+
+    Given a list of files (can use wildcards), a list of LH5 columns, and
+    optionally the group path, return a :class:`pandas.DataFrame` with all
+    values for each parameter.
+
+    See Also
+    --------
+    :func:`load_nda`
+
+    Returns
+    -------
+    dataframe
+        contains columns for each parameter in `par_list`, and rows containing
+        all data for the associated parameters concatenated over all files in
+        `f_list`.
+    """
+    return pd.DataFrame(
+        load_nda(f_list, par_list, lh5_group=lh5_group, idx_list=idx_list)
+    )
+
+
+@nb.njit(parallel=False, fastmath=True)
+def _make_fd_idx(starts, stops, idx):
+    k = 0
+    if len(starts) < len(stops):
+        for i in range(stops[0]):
+            idx[k] = i
+            k += 1
+        stops = stops[1:]
+    for j in range(len(starts)):
+        for i in range(starts[j], stops[j]):
+            idx[k] = i
+            k += 1
+    return (idx,)
diff --git a/src/lgdo/lh5/utils.py b/src/lgdo/lh5/utils.py
new file mode 100644
index 00000000..bc1fd425
--- /dev/null
+++ b/src/lgdo/lh5/utils.py
@@ -0,0 +1,118 @@
+"""Implements utilities for LEGEND Data Objects."""
+from __future__ import annotations
+
+import glob
+import logging
+import os
+import string
+
+log = logging.getLogger(__name__)
+
+
+def parse_datatype(datatype: str) -> tuple[str, tuple[int, ...], str | list[str]]:
+    """Parse datatype string and return type, dimensions and elements.
+
+    Parameters
+    ----------
+    datatype
+        a LGDO-formatted datatype string.
+
+    Returns
+    -------
+    element_type
+        the datatype name dims if not ``None``, a tuple of dimensions for the
+        LGDO. Note this is not the same as the NumPy shape of the underlying
+        data object. See the LGDO specification for more information. Also see
+        :class:`~.types.ArrayOfEqualSizedArrays` and
+        :meth:`.lh5_store.LH5Store.read` for example code elements for
+        numeric objects, the element type for struct-like  objects, the list of
+        fields in the struct.
+    """
+    if "{" not in datatype:
+        return "scalar", None, datatype
+
+    # for other datatypes, need to parse the datatype string
+    from parse import parse
+
+    datatype, element_description = parse("{}{{{}}}", datatype)
+    if datatype.endswith(">"):
+        datatype, dims = parse("{}<{}>", datatype)
+        dims = [int(i) for i in dims.split(",")]
+        return datatype, tuple(dims), element_description
+    else:
+        return datatype, None, element_description.split(",")
+
+
+def expand_vars(expr: str, substitute: dict[str, str] = None) -> str:
+    """Expand (environment) variables.
+
+    Note
+    ----
+    Malformed variable names and references to non-existing variables are left
+    unchanged.
+
+    Parameters
+    ----------
+    expr
+        string expression, which may include (environment) variables prefixed by
+        ``$``.
+    substitute
+        use this dictionary to substitute variables. Takes precedence over
+        environment variables.
+    """
+    if substitute is None:
+        substitute = {}
+
+    # use provided mapping
+    # then expand env variables
+    return os.path.expandvars(string.Template(expr).safe_substitute(substitute))
+
+
+def expand_path(
+    path: str,
+    substitute: dict[str, str] = None,
+    list: bool = False,
+    base_path: str = None,
+) -> str | list:
+    """Expand (environment) variables and wildcards to return absolute paths.
+
+    Parameters
+    ----------
+    path
+        name of path, which may include environment variables and wildcards.
+    list
+        if ``True``, return a list. If ``False``, return a string; if ``False``
+        and a unique file is not found, raise an exception.
+    substitute
+        use this dictionary to substitute variables. Environment variables take
+        precedence.
+    base_path
+        name of base path. Returned paths will be relative to base.
+
+    Returns
+    -------
+    path or list of paths
+        Unique absolute path, or list of all absolute paths
+    """
+    if base_path is not None and base_path != "":
+        base_path = os.path.expanduser(os.path.expandvars(base_path))
+        path = os.path.join(base_path, path)
+
+    # first expand variables
+    _path = expand_vars(path, substitute)
+
+    # then expand wildcards
+    paths = sorted(glob.glob(os.path.expanduser(_path)))
+
+    if base_path is not None and base_path != "":
+        paths = [os.path.relpath(p, base_path) for p in paths]
+
+    if not list:
+        if len(paths) == 0:
+            raise FileNotFoundError(f"could not find path matching {path}")
+        elif len(paths) > 1:
+            raise FileNotFoundError(f"found multiple paths matching {path}")
+        else:
+            return paths[0]
+    else:
+        return paths
diff --git a/src/lgdo/lh5_store.py b/src/lgdo/lh5_store.py
index 7103d05c..ce8b72cd 100644
--- a/src/lgdo/lh5_store.py
+++ b/src/lgdo/lh5_store.py
@@ -1,166 +1,91 @@
-"""
-This module implements routines from reading and writing LEGEND Data Objects in
-HDF5 files.
-"""
 from __future__ import annotations
 
-import fnmatch
-import glob
-import logging
-import os
 import sys
-from bisect import bisect_left
-from collections import defaultdict
-from typing import Any, Iterator, Union
+from typing import Iterator, Union
+from warnings import warn
 
 import h5py
-import numba as nb
 import numpy as np
 import pandas as pd
 
-from . import compression as compress
-from .compression import WaveformCodec
-from .lgdo_utils import expand_path, parse_datatype
-from .types import (
-    Array,
-    ArrayOfEncodedEqualSizedArrays,
-    ArrayOfEqualSizedArrays,
-    FixedSizeArray,
-    Scalar,
-    Struct,
-    Table,
-    VectorOfEncodedVectors,
-    VectorOfVectors,
-    WaveformTable,
-)
-
+from . import lh5
+from .types import Array  # noqa: F401
+from .types import ArrayOfEncodedEqualSizedArrays  # noqa: F401
+from .types import ArrayOfEqualSizedArrays  # noqa: F401
+from .types import FixedSizeArray  # noqa: F401
+from .types import Scalar  # noqa: F401
+from .types import Struct  # noqa: F401
+from .types import Table  # noqa: F401
+from .types import VectorOfEncodedVectors  # noqa: F401
+from .types import VectorOfVectors  # noqa: F401
+from .types import WaveformTable  # noqa: F401
+
+DEFAULT_HDF5_COMPRESSION = None
 LGDO = Union[Array, Scalar, Struct, VectorOfVectors]
-
-log = logging.getLogger(__name__)
-
 DEFAULT_HDF5_SETTINGS: dict[str, ...] = {"shuffle": True, "compression": "gzip"}
 
 
-class LH5Store:
-    """
-    Class to represent a store of LEGEND HDF5 files. The two main methods
-    implemented by the class are :meth:`read_object` and :meth:`write_object`.
-
-    Examples
-    --------
-    >>> from lgdo import LH5Store
-    >>> store = LH5Store()
-    >>> obj, _ = store.read_object("/geds/waveform", "file.lh5")
-    >>> type(obj)
-    lgdo.waveform_table.WaveformTable
-    """
-
-    def __init__(self, base_path: str = "", keep_open: bool = False) -> None:
-        """
-        Parameters
-        ----------
-        base_path
-            directory path to prepend to LH5 files.
-        keep_open
-            whether to keep files open by storing the :mod:`h5py` objects as
-            class attributes.
-        """
-        self.base_path = "" if base_path == "" else expand_path(base_path)
-        self.keep_open = keep_open
-        self.files = {}
-
-    def gimme_file(self, lh5_file: str | h5py.File, mode: str = "r") -> h5py.File:
-        """Returns a :mod:`h5py` file object from the store or creates a new one.
-
-        Parameters
-        ----------
-        lh5_file
-            LH5 file name.
-        mode
-            mode in which to open file. See :class:`h5py.File` documentation.
-        """
-        if isinstance(lh5_file, h5py.File):
-            return lh5_file
-        if mode == "r":
-            lh5_file = expand_path(lh5_file, base_path=self.base_path)
-        if lh5_file in self.files.keys():
-            return self.files[lh5_file]
-        if self.base_path != "":
-            full_path = os.path.join(self.base_path, lh5_file)
-        else:
-            full_path = lh5_file
-        if mode != "r":
-            directory = os.path.dirname(full_path)
-            if directory != "" and not os.path.exists(directory):
-                log.debug(f"making path {directory}")
-                os.makedirs(directory)
-        if mode == "r" and not os.path.exists(full_path):
-            raise FileNotFoundError(f"file {full_path} not found")
-        if mode != "r" and os.path.exists(full_path):
-            log.debug(f"opening existing file {full_path} in mode '{mode}'")
-        h5f = h5py.File(full_path, mode)
-        if self.keep_open:
-            self.files[lh5_file] = h5f
-        return h5f
-
-    def gimme_group(
+class LH5Iterator(lh5.LH5Iterator):
+    def __init__(
         self,
-        group: str | h5py.Group,
-        base_group: h5py.Group,
-        grp_attrs: dict[str, Any] = None,
-        overwrite: bool = False,
-    ) -> h5py.Group:
-        """
-        Returns an existing :class:`h5py` group from a base group or creates a
-        new one. Can also set (or replace) group attributes.
-
-        Parameters
-        ----------
-        group
-            name of the HDF5 group.
-        base_group
-            HDF5 group to be used as a base.
-        grp_attrs
-            HDF5 group attributes.
-        overwrite
-            whether overwrite group attributes, ignored if `grp_attrs` is
-            ``None``.
-        """
-        if not isinstance(group, h5py.Group):
-            if group in base_group:
-                group = base_group[group]
-            else:
-                group = base_group.create_group(group)
-                if grp_attrs is not None:
-                    group.attrs.update(grp_attrs)
-                return group
-        if (
-            grp_attrs is not None
-            and len(set(grp_attrs.items()) ^ set(group.attrs.items())) > 0
-        ):
-            if not overwrite:
-                raise RuntimeError("grp_attrs != group.attrs but overwrite not set")
-            else:
-                log.debug(f"overwriting {group}.attrs...")
-                for key in group.attrs.keys():
-                    group.attrs.pop(key)
-                group.attrs.update(grp_attrs)
-        return group
+        lh5_files: str | list[str],
+        groups: str | list[str],
+        base_path: str = "",
+        entry_list: list[int] | list[list[int]] = None,
+        entry_mask: list[bool] | list[list[bool]] = None,
+        field_mask: dict[str, bool] | list[str] | tuple[str] = None,
+        buffer_len: int = 3200,
+        friend: Iterator = None,
+    ) -> None:
+        warn(
+            "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Iterator."
+            "Please replace 'from lgdo.lh5_store import LH5Iterator' with 'from lgdo.lh5 import LH5Iterator'."
+            "lgdo.lh5_store will be removed in a future release.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        super().__init__(
+            lh5_files,
+            groups,
+            base_path,
+            entry_list,
+            entry_mask,
+            field_mask,
+            buffer_len,
+            friend,
+        )
 
-    def get_buffer(
+    def write_object(
         self,
+        obj: LGDO,
         name: str,
-        lh5_file: str | h5py.File | list[str | h5py.File],
-        size: int = None,
-        field_mask: dict[str, bool] | list[str] | tuple[str] = None,
-    ) -> LGDO:
-        """Returns an LH5 object appropriate for use as a pre-allocated buffer
-        in a read loop. Sets size to `size` if object has a size.
-        """
-        obj, n_rows = self.read_object(name, lh5_file, n_rows=0, field_mask=field_mask)
-        if hasattr(obj, "resize") and size is not None:
-            obj.resize(new_size=size)
-        return obj
+        lh5_file: str | h5py.File,
+        group: str | h5py.Group = "/",
+        start_row: int = 0,
+        n_rows: int = None,
+        wo_mode: str = "append",
+        write_start: int = 0,
+        **h5py_kwargs,
+    ) -> None:
+        warn(
+            "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Iterator. "
+            "The object you are calling this function from uses the old LH5Iterator class."
+            "Please replace 'from lgdo.lh5_store import LH5Iterator' with 'from lgdo.lh5 import LH5Iterator'."
+            "lgdo.lh5_store will be removed in a future release.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        self.write(
+            obj,
+            name,
+            lh5_file,
+            group,
+            start_row,
+            n_rows,
+            wo_mode,
+            write_start,
+            h5py_kwargs,
+        )
 
     def read_object(
         self,
@@ -169,1165 +94,85 @@ def read_object(
         start_row: int = 0,
         n_rows: int = sys.maxsize,
         idx: np.ndarray | list | tuple | list[np.ndarray | list | tuple] = None,
-        use_h5idx: bool = False,
         field_mask: dict[str, bool] | list[str] | tuple[str] = None,
         obj_buf: LGDO = None,
         obj_buf_start: int = 0,
         decompress: bool = True,
     ) -> tuple[LGDO, int]:
-        """Read LH5 object data from a file.
-
-        Use the ``idx`` parameter to read out particular rows of the data. The ``use_h5idx`` flag
-        controls whether *only* those rows are read from disk or if the rows are indexed after reading
-        the entire object. Reading individual rows can be orders of magnitude slower than reading
-        the whole object and then indexing the desired rows. The default behavior (``use_h5idx=False``)
-        is to use slightly more memory for a much faster read. See
-        `legend-pydataobj #29 <https://github.com/legend-exp/legend-pydataobj/issues/29>`_
-        for additional information.
-
-        Parameters
-        ----------
-        name
-            Name of the LH5 object to be read (including its group path).
-        lh5_file
-            The file(s) containing the object to be read out. If a list of
-            files, array-like object data will be concatenated into the output
-            object.
-        start_row
-            Starting entry for the object read (for array-like objects). For a
-            list of files, only applies to the first file.
-        n_rows
-            The maximum number of rows to read (for array-like objects). The
-            actual number of rows read will be returned as one of the return
-            values (see below).
-        idx
-            For NumPy-style "fancying indexing" for the read to select only some
-            rows, e.g. after applying some cuts to particular columns.
-            Only selection along the first axis is supported, so tuple arguments
-            must be one-tuples.  If `n_rows` is not false, `idx` will be truncated to
-            `n_rows` before reading. To use with a list of files, can pass in a list of
-            `idx`'s (one for each file) or use a long contiguous list (e.g. built from a previous
-            identical read). If used in conjunction with `start_row` and `n_rows`,
-            will be sliced to obey those constraints, where `n_rows` is
-            interpreted as the (max) number of *selected* values (in `idx`) to be
-            read out. Note that the ``use_h5idx`` parameter controls some behaviour of the
-            read and that the default behavior (``use_h5idx=False``) prioritizes speed over
-            a small memory penalty.
-        use_h5idx
-            ``True`` will directly pass the ``idx`` parameter to the underlying
-            ``h5py`` call such that only the selected rows are read directly into memory,
-            which conserves memory at the cost of speed. There can be a significant penalty
-            to speed for larger files (1 - 2 orders of magnitude longer time).
-            ``False`` (default) will read the entire object into memory before
-            performing the indexing. The default is much faster but requires additional memory,
-            though a relatively small amount in the typical use case. It is recommended to
-            leave this parameter as its default.
-        field_mask
-            For tables and structs, determines which fields get written out.
-            Only applies to immediate fields of the requested objects. If a dict
-            is used, a default dict will be made with the default set to the
-            opposite of the first element in the dict. This way if one specifies
-            a few fields at ``False``, all but those fields will be read out,
-            while if one specifies just a few fields as ``True``, only those
-            fields will be read out. If a list is provided, the listed fields
-            will be set to ``True``, while the rest will default to ``False``.
-        obj_buf
-            Read directly into memory provided in `obj_buf`. Note: the buffer
-            will be expanded to accommodate the data requested. To maintain the
-            buffer length, send in ``n_rows = len(obj_buf)``.
-        obj_buf_start
-            Start location in ``obj_buf`` for read. For concatenating data to
-            array-like objects.
-        decompress
-            Decompress data encoded with LGDO's compression routines right
-            after reading. The option has no effect on data encoded with HDF5
-            built-in filters, which is always decompressed upstream by HDF5.
-
-
-        Returns
-        -------
-        (object, n_rows_read)
-            `object` is the read-out object `n_rows_read` is the number of rows
-            successfully read out. Essential for arrays when the amount of data
-            is smaller than the object buffer.  For scalars and structs
-            `n_rows_read` will be``1``. For tables it is redundant with
-            ``table.loc``.
-        """
-        # Handle list-of-files recursively
-        if not isinstance(lh5_file, (str, h5py.File)):
-            lh5_file = list(lh5_file)
-            n_rows_read = 0
-
-            # to know whether we are reading in a list of files.
-            # this is part of the fix for reading data by idx
-            # (see https://github.com/legend-exp/legend-pydataobj/issues/29)
-            # so that we only make a copy of the data if absolutely necessary
-            # or if we can read the data from file without having to make a copy
-            self.in_file_loop = True
-
-            for i, h5f in enumerate(lh5_file):
-                if isinstance(idx, list) and len(idx) > 0 and not np.isscalar(idx[0]):
-                    # a list of lists: must be one per file
-                    idx_i = idx[i]
-                elif idx is not None:
-                    # make idx a proper tuple if it's not one already
-                    if not (isinstance(idx, tuple) and len(idx) == 1):
-                        idx = (idx,)
-                    # idx is a long continuous array
-                    n_rows_i = self.read_n_rows(name, h5f)
-                    # find the length of the subset of idx that contains indices
-                    # that are less than n_rows_i
-                    n_rows_to_read_i = bisect_left(idx[0], n_rows_i)
-                    # now split idx into idx_i and the remainder
-                    idx_i = (idx[0][:n_rows_to_read_i],)
-                    idx = (idx[0][n_rows_to_read_i:] - n_rows_i,)
-                else:
-                    idx_i = None
-                n_rows_i = n_rows - n_rows_read
-
-                # maybe someone passed in a list of len==1?
-                if i == (len(lh5_file) - 1):
-                    self.in_file_loop = False
-
-                obj_buf, n_rows_read_i = self.read_object(
-                    name,
-                    lh5_file[i],
-                    start_row=start_row,
-                    n_rows=n_rows_i,
-                    idx=idx_i,
-                    use_h5idx=use_h5idx,
-                    field_mask=field_mask,
-                    obj_buf=obj_buf,
-                    obj_buf_start=obj_buf_start,
-                    decompress=decompress,
-                )
-
-                n_rows_read += n_rows_read_i
-                if n_rows_read >= n_rows or obj_buf is None:
-                    return obj_buf, n_rows_read
-                start_row = 0
-                obj_buf_start += n_rows_read_i
-
-            self.in_file_loop = False
-
-            return obj_buf, n_rows_read
-
-        # get the file from the store
-        h5f = self.gimme_file(lh5_file, "r")
-        if not h5f or name not in h5f:
-            raise KeyError(f"'{name}' not in {h5f.filename}")
-
-        log.debug(
-            f"reading {h5f.filename}:{name}[{start_row}:{n_rows}], decompress = {decompress}, "
-            + (f" with field mask {field_mask}" if field_mask else "")
+        warn(
+            "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Iterator. "
+            "The object you are calling this function from uses the old LH5Iterator class."
+            "Please replace 'from lgdo.lh5_store import LH5Iterator' with 'from lgdo.lh5 import LH5Iterator'."
+            "lgdo.lh5_store will be removed in a future release.",
+            DeprecationWarning,
+            stacklevel=2,
         )
-
-        # make idx a proper tuple if it's not one already
-        if not (isinstance(idx, tuple) and len(idx) == 1):
-            if idx is not None:
-                idx = (idx,)
-
-        # get the object's datatype
-        if "datatype" not in h5f[name].attrs:
-            raise RuntimeError(
-                f"'{name}' in file {lh5_file} is missing the datatype attribute"
-            )
-
-        datatype = h5f[name].attrs["datatype"]
-        datatype, shape, elements = parse_datatype(datatype)
-
-        # check field_mask and make it a default dict
-        if datatype == "struct" or datatype == "table":
-            if field_mask is None:
-                field_mask = defaultdict(lambda: True)
-            elif isinstance(field_mask, dict):
-                default = True
-                if len(field_mask) > 0:
-                    default = not field_mask[list(field_mask.keys())[0]]
-                field_mask = defaultdict(lambda: default, field_mask)
-            elif isinstance(field_mask, (list, tuple)):
-                field_mask = defaultdict(
-                    lambda: False, {field: True for field in field_mask}
-                )
-            elif not isinstance(field_mask, defaultdict):
-                raise RuntimeError("bad field_mask of type", type(field_mask).__name__)
-        elif field_mask is not None:
-            raise RuntimeError(f"datatype {datatype} does not accept a field_mask")
-
-        # Scalar
-        # scalars are dim-0 datasets
-        if datatype == "scalar":
-            value = h5f[name][()]
-            if elements == "bool":
-                value = np.bool_(value)
-            if obj_buf is not None:
-                obj_buf.value = value
-                obj_buf.attrs.update(h5f[name].attrs)
-                return obj_buf, 1
-            else:
-                return Scalar(value=value, attrs=h5f[name].attrs), 1
-
-        # Struct
-        # recursively build a struct, return as a dictionary
-        if datatype == "struct":
-            # ignore obj_buf.
-            # TODO: could append new fields or overwrite/concat to existing
-            # fields. If implemented, get_buffer() above should probably also
-            # (optionally?) prep buffers for each field
-            if obj_buf is not None:
-                raise NotImplementedError("obj_buf not implemented for LGOD Structs")
-
-            # loop over fields and read
-            obj_dict = {}
-            for field in elements:
-                if not field_mask[field]:
-                    continue
-                # TODO: it's strange to pass start_row, n_rows, idx to struct
-                # fields. If they all had shared indexing, they should be in a
-                # table... Maybe should emit a warning? Or allow them to be
-                # dicts keyed by field name?
-                if "int_keys" in h5f[name].attrs:
-                    if dict(h5f[name].attrs)["int_keys"]:
-                        f = int(field)
-                else:
-                    f = str(field)
-                obj_dict[f], _ = self.read_object(
-                    name + "/" + field,
-                    h5f,
-                    start_row=start_row,
-                    n_rows=n_rows,
-                    idx=idx,
-                    use_h5idx=use_h5idx,
-                    decompress=decompress,
-                )
-            # modify datatype in attrs if a field_mask was used
-            attrs = dict(h5f[name].attrs)
-            if field_mask is not None:
-                selected_fields = []
-                for field in elements:
-                    if field_mask[field]:
-                        selected_fields.append(field)
-                attrs["datatype"] = "struct" + "{" + ",".join(selected_fields) + "}"
-            return Struct(obj_dict=obj_dict, attrs=attrs), 1
-
-        # Below here is all array-like types. So trim idx if needed
-        if idx is not None:
-            # chop off indices < start_row
-            i_first_valid = bisect_left(idx[0], start_row)
-            idxa = idx[0][i_first_valid:]
-            # don't readout more than n_rows indices
-            idx = (idxa[:n_rows],)  # works even if n_rows > len(idxa)
-
-        # Table or WaveformTable
-        if datatype == "table":
-            col_dict = {}
-
-            # read out each of the fields
-            rows_read = []
-            for field in elements:
-                if not field_mask[field]:
-                    continue
-
-                fld_buf = None
-                if obj_buf is not None:
-                    if not isinstance(obj_buf, Table) or field not in obj_buf:
-                        raise ValueError(
-                            f"obj_buf for LGDO Table '{name}' not formatted correctly"
-                        )
-
-                    else:
-                        fld_buf = obj_buf[field]
-
-                col_dict[field], n_rows_read = self.read_object(
-                    name + "/" + field,
-                    h5f,
-                    start_row=start_row,
-                    n_rows=n_rows,
-                    idx=idx,
-                    use_h5idx=use_h5idx,
-                    obj_buf=fld_buf,
-                    obj_buf_start=obj_buf_start,
-                    decompress=decompress,
-                )
-                if obj_buf is not None and obj_buf_start + n_rows_read > len(obj_buf):
-                    obj_buf.resize(obj_buf_start + n_rows_read)
-
-                rows_read.append(n_rows_read)
-
-            # warn if all columns don't read in the same number of rows
-            if len(rows_read) > 0:
-                n_rows_read = rows_read[0]
-            else:
-                n_rows_read = 0
-                log.warning(f"Table '{name}' has no subgroups accepted by field mask")
-
-            for n in rows_read[1:]:
-                if n != n_rows_read:
-                    log.warning(
-                        f"Table '{name}' got strange n_rows_read = {n}, {n_rows_read} was expected ({rows_read})"
-                    )
-
-            # modify datatype in attrs if a field_mask was used
-            attrs = dict(h5f[name].attrs)
-            if field_mask is not None:
-                selected_fields = []
-                for field in elements:
-                    if field_mask[field]:
-                        selected_fields.append(field)
-                attrs["datatype"] = "table" + "{" + ",".join(selected_fields) + "}"
-
-            # fields have been read out, now return a table
-            if obj_buf is None:
-                # if col_dict contains just 3 objects called t0, dt, and values,
-                # return a WaveformTable
-                if (
-                    len(col_dict) == 3
-                    and "t0" in col_dict
-                    and "dt" in col_dict
-                    and "values" in col_dict
-                ):
-                    table = WaveformTable(
-                        t0=col_dict["t0"], dt=col_dict["dt"], values=col_dict["values"]
-                    )
-                else:
-                    table = Table(col_dict=col_dict, attrs=attrs)
-
-                # set (write) loc to end of tree
-                table.loc = n_rows_read
-                return table, n_rows_read
-            else:
-                # We have read all fields into the object buffer. Run
-                # checks: All columns should be the same size. So update
-                # table's size as necessary, warn if any mismatches are found
-                obj_buf.resize(do_warn=True)
-                # set (write) loc to end of tree
-                obj_buf.loc = obj_buf_start + n_rows_read
-                # check attributes
-                if set(obj_buf.attrs.keys()) != set(attrs.keys()):
-                    raise RuntimeError(
-                        f"attrs mismatch. obj_buf.attrs: "
-                        f"{obj_buf.attrs}, h5f[{name}].attrs: {attrs}"
-                    )
-                return obj_buf, n_rows_read
-
-        # ArrayOfEncodedEqualSizedArrays and VectorOfEncodedVectors
-        for cond, enc_lgdo in [
-            (
-                datatype == "array_of_encoded_equalsized_arrays",
-                ArrayOfEncodedEqualSizedArrays,
-            ),
-            (elements.startswith("encoded_array"), VectorOfEncodedVectors),
-        ]:
-            if cond:
-                if (
-                    not decompress
-                    and obj_buf is not None
-                    and not isinstance(obj_buf, enc_lgdo)
-                ):
-                    raise ValueError(f"obj_buf for '{name}' not a {enc_lgdo}")
-
-                # read out decoded_size, either a Scalar or an Array
-                decoded_size_buf = encoded_data_buf = None
-                if obj_buf is not None and not decompress:
-                    decoded_size_buf = obj_buf.decoded_size
-                    encoded_data_buf = obj_buf.encoded_data
-
-                decoded_size, _ = self.read_object(
-                    f"{name}/decoded_size",
-                    h5f,
-                    start_row=start_row,
-                    n_rows=n_rows,
-                    idx=idx,
-                    use_h5idx=use_h5idx,
-                    obj_buf=None if decompress else decoded_size_buf,
-                    obj_buf_start=0 if decompress else obj_buf_start,
-                )
-
-                # read out encoded_data, a VectorOfVectors
-                encoded_data, n_rows_read = self.read_object(
-                    f"{name}/encoded_data",
-                    h5f,
-                    start_row=start_row,
-                    n_rows=n_rows,
-                    idx=idx,
-                    use_h5idx=use_h5idx,
-                    obj_buf=None if decompress else encoded_data_buf,
-                    obj_buf_start=0 if decompress else obj_buf_start,
-                )
-
-                # return the still encoded data in the buffer object, if there
-                if obj_buf is not None and not decompress:
-                    return obj_buf, n_rows_read
-
-                # otherwise re-create the encoded LGDO
-                rawdata = enc_lgdo(
-                    encoded_data=encoded_data,
-                    decoded_size=decoded_size,
-                    attrs=h5f[name].attrs,
-                )
-
-                # already return if no decompression is requested
-                if not decompress:
-                    return rawdata, n_rows_read
-
-                # if no buffer, decode and return
-                elif obj_buf is None and decompress:
-                    return compress.decode(rawdata), n_rows_read
-
-                # eventually expand provided obj_buf, if too short
-                buf_size = obj_buf_start + n_rows_read
-                if len(obj_buf) < buf_size:
-                    obj_buf.resize(buf_size)
-
-                # use the (decoded object type) buffer otherwise
-                if enc_lgdo == ArrayOfEncodedEqualSizedArrays:
-                    if not isinstance(obj_buf, ArrayOfEqualSizedArrays):
-                        raise ValueError(
-                            f"obj_buf for decoded '{name}' not an ArrayOfEqualSizedArrays"
-                        )
-
-                    compress.decode(rawdata, obj_buf[obj_buf_start:buf_size])
-
-                elif enc_lgdo == VectorOfEncodedVectors:
-                    if not isinstance(obj_buf, VectorOfVectors):
-                        raise ValueError(
-                            f"obj_buf for decoded '{name}' not a VectorOfVectors"
-                        )
-
-                    # FIXME: not a good idea. an in place decoding version
-                    # of decode would be needed to avoid extra memory
-                    # allocations
-                    for i, wf in enumerate(compress.decode(rawdata)):
-                        obj_buf[obj_buf_start + i] = wf
-
-                return obj_buf, n_rows_read
-
-        # VectorOfVectors
-        # read out vector of vectors of different size
-        if elements.startswith("array"):
-            if obj_buf is not None and not isinstance(obj_buf, VectorOfVectors):
-                raise ValueError(f"obj_buf for '{name}' not a LGDO VectorOfVectors")
-
-            # read out cumulative_length
-            cumulen_buf = None if obj_buf is None else obj_buf.cumulative_length
-            cumulative_length, n_rows_read = self.read_object(
-                f"{name}/cumulative_length",
-                h5f,
-                start_row=start_row,
-                n_rows=n_rows,
-                idx=idx,
-                use_h5idx=use_h5idx,
-                obj_buf=cumulen_buf,
-                obj_buf_start=obj_buf_start,
-            )
-            # get a view of just what was read out for cleaner code below
-            this_cumulen_nda = cumulative_length.nda[
-                obj_buf_start : obj_buf_start + n_rows_read
-            ]
-
-            if idx is not None and n_rows_read > 0:
-                # get the starting indices for each array in flattended data:
-                # the starting index for array[i] is cumulative_length[i-1]
-                idx2 = (np.asarray(idx[0]).copy() - 1,)
-                # re-read cumulative_length with these indices
-                # note this will allocate memory for fd_starts!
-                fd_start = None
-                if idx2[0][0] == -1:
-                    idx2 = (idx2[0][1:],)
-                    fd_start = 0  # this variable avoids an ndarray append
-                fd_starts, fds_n_rows_read = self.read_object(
-                    f"{name}/cumulative_length",
-                    h5f,
-                    start_row=start_row,
-                    n_rows=n_rows,
-                    idx=idx2,
-                    use_h5idx=use_h5idx,
-                )
-                fd_starts = fd_starts.nda  # we just need the nda
-                if fd_start is None:
-                    fd_start = fd_starts[0]
-
-                # compute the length that flattened_data will have after the
-                # fancy-indexed read
-                fd_n_rows = np.sum(this_cumulen_nda[-len(fd_starts) :] - fd_starts)
-                if fd_start == 0:
-                    fd_n_rows += this_cumulen_nda[0]
-
-                # now make fd_idx
-                fd_idx = np.empty(fd_n_rows, dtype="uint32")
-                fd_idx = _make_fd_idx(fd_starts, this_cumulen_nda, fd_idx)
-
-                # Now clean up this_cumulen_nda, to be ready
-                # to match the in-memory version of flattened_data. Note: these
-                # operations on the view change the original array because they are
-                # numpy arrays, not lists.
-                this_cumulen_nda[-len(fd_starts) :] -= fd_starts
-                np.cumsum(this_cumulen_nda, out=this_cumulen_nda)
-
-            else:
-                fd_idx = None
-
-                # determine the start_row and n_rows for the flattened_data readout
-                fd_start = 0
-                if start_row > 0 and n_rows_read > 0:
-                    # need to read out the cumulen sample -before- the first sample
-                    # read above in order to get the starting row of the first
-                    # vector to read out in flattened_data
-                    fd_start = h5f[f"{name}/cumulative_length"][start_row - 1]
-
-                    # check limits for values that will be used subsequently
-                    if this_cumulen_nda[-1] < fd_start:
-                        log.debug(
-                            f"this_cumulen_nda[-1] = {this_cumulen_nda[-1]}, "
-                            f"fd_start = {fd_start}, "
-                            f"start_row = {start_row}, "
-                            f"n_rows_read = {n_rows_read}"
-                        )
-                        raise RuntimeError(
-                            f"cumulative_length non-increasing between entries "
-                            f"{start_row} and {start_row+n_rows_read} ??"
-                        )
-
-                # determine the number of rows for the flattened_data readout
-                fd_n_rows = this_cumulen_nda[-1] if n_rows_read > 0 else 0
-
-                # Now done with this_cumulen_nda, so we can clean it up to be ready
-                # to match the in-memory version of flattened_data. Note: these
-                # operations on the view change the original array because they are
-                # numpy arrays, not lists.
-                #
-                # First we need to subtract off the in-file offset for the start of
-                # read for flattened_data
-                this_cumulen_nda -= fd_start
-
-            # If we started with a partially-filled buffer, add the
-            # appropriate offset for the start of the in-memory flattened
-            # data for this read.
-            fd_buf_start = np.uint32(0)
-            if obj_buf_start > 0:
-                fd_buf_start = cumulative_length.nda[obj_buf_start - 1]
-                this_cumulen_nda += fd_buf_start
-
-            # Now prepare the object buffer if necessary
-            fd_buf = None
-            if obj_buf is not None:
-                fd_buf = obj_buf.flattened_data
-                # grow fd_buf if necessary to hold the data
-                fdb_size = fd_buf_start + fd_n_rows
-                if len(fd_buf) < fdb_size:
-                    fd_buf.resize(fdb_size)
-
-            # now read
-            flattened_data, dummy_rows_read = self.read_object(
-                f"{name}/flattened_data",
-                h5f,
-                start_row=fd_start,
-                n_rows=fd_n_rows,
-                idx=fd_idx,
-                use_h5idx=use_h5idx,
-                obj_buf=fd_buf,
-                obj_buf_start=fd_buf_start,
-            )
-            if obj_buf is not None:
-                return obj_buf, n_rows_read
-            return (
-                VectorOfVectors(
-                    flattened_data=flattened_data,
-                    cumulative_length=cumulative_length,
-                    attrs=h5f[name].attrs,
-                ),
-                n_rows_read,
-            )
-
-        # Array
-        # FixedSizeArray
-        # ArrayOfEqualSizedArrays
-        # read out all arrays by slicing
-        if "array" in datatype:
-            if obj_buf is not None:
-                if not isinstance(obj_buf, Array):
-                    raise ValueError(f"obj_buf for '{name}' not an LGDO Array")
-                    obj_buf = None
-
-            # compute the number of rows to read
-            # we culled idx above for start_row and n_rows, now we have to apply
-            # the constraint of the length of the dataset
-            ds_n_rows = h5f[name].shape[0]
-            if idx is not None:
-                if len(idx[0]) > 0 and idx[0][-1] >= ds_n_rows:
-                    log.warning(
-                        "idx indexed past the end of the array in the file. Culling..."
-                    )
-                    n_rows_to_read = bisect_left(idx[0], ds_n_rows)
-                    idx = (idx[0][:n_rows_to_read],)
-                    if len(idx[0]) == 0:
-                        log.warning("idx empty after culling.")
-                n_rows_to_read = len(idx[0])
-            else:
-                n_rows_to_read = ds_n_rows - start_row
-            if n_rows_to_read > n_rows:
-                n_rows_to_read = n_rows
-
-            # if idx is passed, check if we can make it a slice instead (faster)
-            change_idx_to_slice = False
-
-            # prepare the selection for the read. Use idx if available
-            if idx is not None:
-                # check if idx is empty and convert to slice instead
-                if len(idx[0]) == 0:
-                    source_sel = np.s_[0:0]
-                    change_idx_to_slice = True
-                # check if idx is contiguous and increasing
-                # if so, convert it to a slice instead (faster)
-                elif np.all(np.diff(idx[0]) == 1):
-                    source_sel = np.s_[idx[0][0] : idx[0][-1] + 1]
-                    change_idx_to_slice = True
-                else:
-                    source_sel = idx
-            else:
-                source_sel = np.s_[start_row : start_row + n_rows_to_read]
-
-            # Now read the array
-            if obj_buf is not None and n_rows_to_read > 0:
-                buf_size = obj_buf_start + n_rows_to_read
-                if len(obj_buf) < buf_size:
-                    obj_buf.resize(buf_size)
-                dest_sel = np.s_[obj_buf_start:buf_size]
-
-                # this is required to make the read of multiple files faster
-                # until a better solution found.
-                if change_idx_to_slice or idx is None or use_h5idx:
-                    h5f[name].read_direct(obj_buf.nda, source_sel, dest_sel)
-                else:
-                    # it is faster to read the whole object and then do fancy indexing
-                    obj_buf.nda[dest_sel] = h5f[name][...][source_sel]
-
-                nda = obj_buf.nda
-            else:
-                if n_rows == 0:
-                    tmp_shape = (0,) + h5f[name].shape[1:]
-                    nda = np.empty(tmp_shape, h5f[name].dtype)
-                else:
-                    if change_idx_to_slice or idx is None or use_h5idx:
-                        nda = h5f[name][source_sel]
-                    else:
-                        # it is faster to read the whole object and then do fancy indexing
-                        nda = h5f[name][...][source_sel]
-
-                        # if reading a list of files recursively, this is given to obj_buf on
-                        # the first file read. obj_buf needs to be resized and therefore
-                        # it needs to hold the data itself (not a view of the data).
-                        # a view is returned by the source_sel indexing, which cannot be resized
-                        # by ndarray.resize().
-                        if hasattr(self, "in_file_loop") and self.in_file_loop:
-                            nda = np.copy(nda)
-
-            # special handling for bools
-            # (c and Julia store as uint8 so cast to bool)
-            if elements == "bool":
-                nda = nda.astype(np.bool_)
-
-            # Finally, set attributes and return objects
-            attrs = h5f[name].attrs
-            if obj_buf is None:
-                if datatype == "array":
-                    return Array(nda=nda, attrs=attrs), n_rows_to_read
-                if datatype == "fixedsize_array":
-                    return FixedSizeArray(nda=nda, attrs=attrs), n_rows_to_read
-                if datatype == "array_of_equalsized_arrays":
-                    return (
-                        ArrayOfEqualSizedArrays(nda=nda, dims=shape, attrs=attrs),
-                        n_rows_to_read,
-                    )
-            else:
-                if set(obj_buf.attrs.keys()) != set(attrs.keys()):
-                    raise RuntimeError(
-                        f"attrs mismatch. "
-                        f"obj_buf.attrs: {obj_buf.attrs}, "
-                        f"h5f[{name}].attrs: {attrs}"
-                    )
-                return obj_buf, n_rows_to_read
-
-        raise RuntimeError("don't know how to read datatype {datatype}")
-
-    def write_object(
-        self,
-        obj: LGDO,
-        name: str,
-        lh5_file: str | h5py.File,
-        group: str | h5py.Group = "/",
-        start_row: int = 0,
-        n_rows: int = None,
-        wo_mode: str = "append",
-        write_start: int = 0,
-        **h5py_kwargs,
-    ) -> None:
-        """Write an LGDO into an LH5 file.
-
-        If the `obj` :class:`.LGDO` has a `compression` attribute, its value is
-        interpreted as the algorithm to be used to compress `obj` before
-        writing to disk. The type of `compression` can be:
-
-        string, kwargs dictionary, hdf5plugin filter
-          interpreted as the name of a built-in or custom `HDF5 compression
-          filter <https://docs.h5py.org/en/stable/high/dataset.html#filter-pipeline>`_
-          (``"gzip"``, ``"lzf"``, :mod:`hdf5plugin` filter object etc.) and
-          passed directly to :meth:`h5py.Group.create_dataset`.
-
-        :class:`.WaveformCodec` object
-          If `obj` is a :class:`.WaveformTable` and ``obj.values`` holds the
-          attribute, compress ``values`` using this algorithm. More
-          documentation about the supported waveform compression algorithms at
-          :mod:`.lgdo.compression`.
-
-        If the `obj` :class:`.LGDO` has a `hdf5_settings` attribute holding a
-        dictionary, it is interpreted as a list of keyword arguments to be
-        forwarded directly to :meth:`h5py.Group.create_dataset` (exactly like
-        the first format of `compression` above). This is the preferred way to
-        specify HDF5 dataset options such as chunking etc. If compression
-        options are specified, they take precedence over those set with the
-        `compression` attribute.
-
-        Note
-        ----
-        The `compression` LGDO attribute takes precedence over the default HDF5
-        compression settings. The `hdf5_settings` attribute takes precedence
-        over `compression`. These attributes are not written to disk.
-
-        Note
-        ----
-        HDF5 compression is skipped for the `encoded_data.flattened_data`
-        dataset of :class:`.VectorOfEncodedVectors` and
-        :class:`.ArrayOfEncodedEqualSizedArrays`.
-
-        Parameters
-        ----------
-        obj
-            LH5 object. if object is array-like, writes `n_rows` starting from
-            `start_row` in `obj`.
-        name
-            name of the object in the output HDF5 file.
-        lh5_file
-            HDF5 file name or :class:`h5py.File` object.
-        group
-            HDF5 group name or :class:`h5py.Group` object in which `obj` should
-            be written.
-        start_row
-            first row in `obj` to be written.
-        n_rows
-            number of rows in `obj` to be written.
-        wo_mode
-            - ``write_safe`` or ``w``: only proceed with writing if the
-              object does not already exist in the file.
-            - ``append`` or ``a``: append along axis 0 (the first dimension)
-              of array-like objects and array-like subfields of structs.
-              :class:`~.lgdo.scalar.Scalar` objects get overwritten.
-            - ``overwrite`` or ``o``: replace data in the file if present,
-              starting from `write_start`. Note: overwriting with `write_start` =
-              end of array is the same as ``append``.
-            - ``overwrite_file`` or ``of``: delete file if present prior to
-              writing to it. `write_start` should be 0 (its ignored).
-            - ``append_column`` or ``ac``: append columns from an :class:`~.lgdo.table.Table`
-              `obj` only if there is an existing :class:`~.lgdo.table.Table` in the `lh5_file` with
-              the same `name` and :class:`~.lgdo.table.Table.size`. If the sizes don't match,
-              or if there are matching fields, it errors out.
-        write_start
-            row in the output file (if already existing) to start overwriting
-            from.
-        **h5py_kwargs
-            additional keyword arguments forwarded to
-            :meth:`h5py.Group.create_dataset` to specify, for example, an HDF5
-            compression filter to be applied before writing non-scalar
-            datasets. **Note: `compression` Ignored if compression is specified
-            as an `obj` attribute.**
-        """
-        log.debug(
-            f"writing {repr(obj)}[{start_row}:{n_rows}] as "
-            f"{lh5_file}:{group}/{name}[{write_start}:], "
-            f"mode = {wo_mode}, h5py_kwargs = {h5py_kwargs}"
+        return self.read(
+            name,
+            lh5_file,
+            start_row,
+            n_rows,
+            idx,
+            field_mask,
+            obj_buf,
+            obj_buf_start,
+            decompress,
         )
 
-        if wo_mode == "write_safe":
-            wo_mode = "w"
-        if wo_mode == "append":
-            wo_mode = "a"
-        if wo_mode == "overwrite":
-            wo_mode = "o"
-        if wo_mode == "overwrite_file":
-            wo_mode = "of"
-            write_start = 0
-        if wo_mode == "append_column":
-            wo_mode = "ac"
-        if wo_mode not in ["w", "a", "o", "of", "ac"]:
-            raise ValueError(f"unknown wo_mode '{wo_mode}'")
-
-        # "mode" is for the h5df.File and wo_mode is for this function
-        # In hdf5, 'a' is really "modify" -- in addition to appending, you can
-        # change any object in the file. So we use file:append for
-        # write_object:overwrite.
-        mode = "w" if wo_mode == "of" else "a"
-        lh5_file = self.gimme_file(lh5_file, mode=mode)
-        group = self.gimme_group(group, lh5_file)
-        if wo_mode == "w" and name in group:
-            raise RuntimeError(f"can't overwrite '{name}' in wo_mode 'write_safe'")
-
-        # struct or table or waveform table
-        if isinstance(obj, Struct):
-            # In order to append a column, we need to update the `table{old_fields}` value in `group.attrs['datatype"]` to include the new fields.
-            # One way to do this is to override `obj.attrs["datatype"]` to include old and new fields. Then we can write the fields to the table as normal.
-            if wo_mode == "ac":
-                old_group = self.gimme_group(name, group)
-                datatype, shape, fields = parse_datatype(old_group.attrs["datatype"])
-                if datatype not in ["table", "struct"]:
-                    raise RuntimeError(
-                        f"Trying to append columns to an object of type {datatype}"
-                    )
-
-                # If the mode is `append_column`, make sure we aren't appending a table that has a column of the same name as in the existing table
-                # Also make sure that the field we are adding has the same size
-                if len(list(set(fields).intersection(set(obj.keys())))) != 0:
-                    raise ValueError(
-                        f"Can't append {list(set(fields).intersection(set(obj.keys())))} column(s) to a table with the same field(s)"
-                    )
-                # It doesn't matter what key we access, as all fields in the old table have the same size
-                if old_group[list(old_group.keys())[0]].size != obj.size:
-                    raise ValueError(
-                        f"Table sizes don't match. Trying to append column of size {obj.size} to a table of size {old_group[list(old_group.keys())[0]].size}."
-                    )
-
-                # Now we can append the obj.keys() to the old fields, and then update obj.attrs.
-                fields.extend(list(obj.keys()))
-                obj.attrs.pop("datatype")
-                obj.attrs["datatype"] = "table" + "{" + ",".join(fields) + "}"
-
-            group = self.gimme_group(
-                name,
-                group,
-                grp_attrs=obj.attrs,
-                overwrite=(wo_mode in ["o", "ac"]),
-            )
-            # If the mode is overwrite, then we need to peek into the file's table's existing fields
-            # If we are writing a new table to the group that does not contain an old field, we should delete that old field from the file
-            if wo_mode == "o":
-                # Find the old keys in the group that are not present in the new table's keys, then delete them
-                for key in list(set(group.keys()) - set(obj.keys())):
-                    log.debug(f"{key} is not present in new table, deleting field")
-                    del group[key]
-
-            for field in obj.keys():
-                # eventually compress waveform table values with LGDO's
-                # custom codecs before writing
-                # if waveformtable.values.attrs["compression"] is NOT a
-                # WaveformCodec, just leave it there
-                obj_fld = None
-                if (
-                    isinstance(obj, WaveformTable)
-                    and field == "values"
-                    and not isinstance(obj.values, VectorOfEncodedVectors)
-                    and not isinstance(obj.values, ArrayOfEncodedEqualSizedArrays)
-                    and "compression" in obj.values.attrs
-                    and isinstance(obj.values.attrs["compression"], WaveformCodec)
-                ):
-                    codec = obj.values.attrs["compression"]
-                    obj_fld = compress.encode(obj.values, codec=codec)
-                else:
-                    obj_fld = obj[field]
-
-                # Convert keys to string for dataset names
-                f = str(field)
-                self.write_object(
-                    obj_fld,
-                    f,
-                    lh5_file,
-                    group=group,
-                    start_row=start_row,
-                    n_rows=n_rows,
-                    wo_mode=wo_mode,
-                    write_start=write_start,
-                    **h5py_kwargs,
-                )
-            return
-
-        # scalars
-        elif isinstance(obj, Scalar):
-            if name in group:
-                if wo_mode in ["o", "a"]:
-                    log.debug(f"overwriting {name} in {group}")
-                    del group[name]
-                else:
-                    raise RuntimeError(
-                        f"tried to overwrite {name} in {group} for wo_mode {wo_mode}"
-                    )
-            ds = group.create_dataset(name, shape=(), data=obj.value)
-            ds.attrs.update(obj.attrs)
-            return
-
-        # vector of encoded vectors
-        elif isinstance(obj, (VectorOfEncodedVectors, ArrayOfEncodedEqualSizedArrays)):
-            group = self.gimme_group(
-                name, group, grp_attrs=obj.attrs, overwrite=(wo_mode == "o")
-            )
-
-            # ask not to further compress flattened_data, it is already compressed!
-            obj.encoded_data.flattened_data.attrs["compression"] = None
-
-            self.write_object(
-                obj.encoded_data,
-                "encoded_data",
-                lh5_file,
-                group=group,
-                start_row=start_row,
-                n_rows=n_rows,
-                wo_mode=wo_mode,
-                write_start=write_start,
-                **h5py_kwargs,
-            )
-
-            self.write_object(
-                obj.decoded_size,
-                "decoded_size",
-                lh5_file,
-                group=group,
-                start_row=start_row,
-                n_rows=n_rows,
-                wo_mode=wo_mode,
-                write_start=write_start,
-                **h5py_kwargs,
-            )
-
-        # vector of vectors
-        elif isinstance(obj, VectorOfVectors):
-            group = self.gimme_group(
-                name, group, grp_attrs=obj.attrs, overwrite=(wo_mode == "o")
-            )
-            if (
-                n_rows is None
-                or n_rows > obj.cumulative_length.nda.shape[0] - start_row
-            ):
-                n_rows = obj.cumulative_length.nda.shape[0] - start_row
 
-            # if appending we need to add an appropriate offset to the
-            # cumulative lengths as appropriate for the in-file object
-            offset = 0  # declare here because we have to subtract it off at the end
-            if (wo_mode == "a" or wo_mode == "o") and "cumulative_length" in group:
-                len_cl = len(group["cumulative_length"])
-                if wo_mode == "a":
-                    write_start = len_cl
-                if len_cl > 0:
-                    offset = group["cumulative_length"][write_start - 1]
-
-            # First write flattened_data array. Only write rows with data.
-            fd_start = 0 if start_row == 0 else obj.cumulative_length.nda[start_row - 1]
-            fd_n_rows = obj.cumulative_length.nda[start_row + n_rows - 1] - fd_start
-            self.write_object(
-                obj.flattened_data,
-                "flattened_data",
-                lh5_file,
-                group=group,
-                start_row=fd_start,
-                n_rows=fd_n_rows,
-                wo_mode=wo_mode,
-                write_start=offset,
-                **h5py_kwargs,
-            )
-
-            # now offset is used to give appropriate in-file values for
-            # cumulative_length. Need to adjust it for start_row
-            if start_row > 0:
-                offset -= obj.cumulative_length.nda[start_row - 1]
-
-            # Add offset to obj.cumulative_length itself to avoid memory allocation.
-            # Then subtract it off after writing! (otherwise it will be changed
-            # upon return)
-            cl_dtype = obj.cumulative_length.nda.dtype.type
-            obj.cumulative_length.nda += cl_dtype(offset)
-
-            self.write_object(
-                obj.cumulative_length,
-                "cumulative_length",
-                lh5_file,
-                group=group,
-                start_row=start_row,
-                n_rows=n_rows,
-                wo_mode=wo_mode,
-                write_start=write_start,
-                **h5py_kwargs,
-            )
-            obj.cumulative_length.nda -= cl_dtype(offset)
-
-            return
-
-        # if we get this far, must be one of the Array types
-        elif isinstance(obj, Array):
-            if n_rows is None or n_rows > obj.nda.shape[0] - start_row:
-                n_rows = obj.nda.shape[0] - start_row
-
-            nda = obj.nda[start_row : start_row + n_rows]
-
-            # hack to store bools as uint8 for c / Julia compliance
-            if nda.dtype.name == "bool":
-                nda = nda.astype(np.uint8)
-
-            # need to create dataset from ndarray the first time for speed
-            # creating an empty dataset and appending to that is super slow!
-            if (wo_mode != "a" and write_start == 0) or name not in group:
-                # this is needed in order to have a resizable (in the first
-                # axis) data set, i.e. rows can be appended later
-                # NOTE: this automatically turns chunking on!
-                maxshape = (None,) + nda.shape[1:]
-                h5py_kwargs.setdefault("maxshape", maxshape)
-
-                if wo_mode == "o" and name in group:
-                    log.debug(f"overwriting {name} in {group}")
-                    del group[name]
-
-                # set default compression options
-                for k, v in DEFAULT_HDF5_SETTINGS.items():
-                    h5py_kwargs.setdefault(k, v)
-
-                # compress using the 'compression' LGDO attribute, if available
-                if "compression" in obj.attrs:
-                    comp_algo = obj.attrs["compression"]
-                    if isinstance(comp_algo, dict):
-                        h5py_kwargs |= obj.attrs["compression"]
-                    else:
-                        h5py_kwargs["compression"] = obj.attrs["compression"]
-
-                # and even the 'hdf5_settings' one, preferred
-                if "hdf5_settings" in obj.attrs:
-                    h5py_kwargs |= obj.attrs["hdf5_settings"]
-
-                # create HDF5 dataset
-                ds = group.create_dataset(name, data=nda, **h5py_kwargs)
-
-                # attach HDF5 dataset attributes, but not "compression"!
-                _attrs = obj.getattrs(datatype=True)
-                _attrs.pop("compression", None)
-                _attrs.pop("hdf5_settings", None)
-                ds.attrs.update(_attrs)
-                return
-
-            # Now append or overwrite
-            ds = group[name]
-            if not isinstance(ds, h5py.Dataset):
-                raise RuntimeError(
-                    f"existing HDF5 object '{name}' in group '{group}'"
-                    " is not a dataset! Cannot overwrite or append"
-                )
-
-            old_len = ds.shape[0]
-            if wo_mode == "a":
-                write_start = old_len
-            add_len = write_start + nda.shape[0] - old_len
-            ds.resize(old_len + add_len, axis=0)
-            ds[write_start:] = nda
-            return
-
-        else:
-            raise RuntimeError(
-                f"do not know how to write '{name}' of type '{type(obj).__name__}'"
-            )
-
-    def read_n_rows(self, name: str, lh5_file: str | h5py.File) -> int | None:
-        """Look up the number of rows in an Array-like object called `name` in
-        `lh5_file`.
-
-        Return ``None`` if it is a :class:`.Scalar` or a :class:`.Struct`."""
-        # this is basically a stripped down version of read_object
-        h5f = self.gimme_file(lh5_file, "r")
-        if not h5f or name not in h5f:
-            raise KeyError(f"'{name}' not in {lh5_file}")
-
-        # get the datatype
-        if "datatype" not in h5f[name].attrs:
-            raise RuntimeError(
-                f"'{name}' in file {lh5_file} is missing the datatype attribute"
-            )
-
-        datatype = h5f[name].attrs["datatype"]
-        datatype, shape, elements = parse_datatype(datatype)
-
-        # scalars are dim-0 datasets
-        if datatype == "scalar":
-            return None
-
-        # structs don't have rows
-        if datatype == "struct":
-            return None
-
-        # tables should have elements with all the same length
-        if datatype == "table":
-            # read out each of the fields
-            rows_read = None
-            for field in elements:
-                n_rows_read = self.read_n_rows(name + "/" + field, h5f)
-                if not rows_read:
-                    rows_read = n_rows_read
-                elif rows_read != n_rows_read:
-                    log.warning(
-                        f"'{field}' field in table '{name}' has {rows_read} rows, "
-                        f"{n_rows_read} was expected"
-                    )
-            return rows_read
+class LH5Store(lh5.LH5Store):
+    def __init__(self, base_path: str = "", keep_open: bool = False):
+        warn(
+            "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Store. "
+            "Please replace 'from lgdo.lh5_store import LH5Store' with 'from lgdo.lh5 import LH5Store'."
+            "lgdo.lh5_store will be removed in a future release.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        super().__init__(base_path, keep_open)
 
-        # length of vector of vectors is the length of its cumulative_length
-        if elements.startswith("array"):
-            return self.read_n_rows(f"{name}/cumulative_length", h5f)
 
-        # length of vector of encoded vectors is the length of its decoded_size
-        if (
-            elements.startswith("encoded_array")
-            or datatype == "array_of_encoded_equalsized_arrays"
-        ):
-            return self.read_n_rows(f"{name}/encoded_data", h5f)
+def load_dfs(
+    f_list: str | list[str],
+    par_list: list[str],
+    lh5_group: str = "",
+    idx_list: list[np.ndarray | list | tuple] = None,
+) -> pd.DataFrame:
+    warn(
+        "lgdo.lh5_store has moved to a subfolder lgdo.lh5. "
+        "Please replace 'from lgdo.lh5_store import load_dfs' with 'from lgdo.lh5 import load_dfs'. "
+        "lgdo.lh5_store will be removed in a future release.",
+        DeprecationWarning,
+        stacklevel=2,
+    )
+    return lh5.load_dfs(f_list, par_list, lh5_group, idx_list)
 
-        # return array length (without reading the array!)
-        if "array" in datatype:
-            # compute the number of rows to read
-            return h5f[name].shape[0]
 
-        raise RuntimeError(f"don't know how to read datatype '{datatype}'")
+def load_nda(
+    f_list: str | list[str],
+    par_list: list[str],
+    lh5_group: str = "",
+    idx_list: list[np.ndarray | list | tuple] = None,
+) -> dict[str, np.ndarray]:
+    warn(
+        "lgdo.lh5_store has moved to a subfolder lgdo.lh5. "
+        "Please replace 'from lgdo.lh5_store import load_nda' with 'from lgdo.lh5 import load_nda'. "
+        "lgdo.lh5_store will be removed in a future release.",
+        DeprecationWarning,
+        stacklevel=2,
+    )
+    return lh5.load_nda(f_list, par_list, lh5_group, idx_list)
 
 
 def ls(lh5_file: str | h5py.Group, lh5_group: str = "") -> list[str]:
-    """Return a list of LH5 groups in the input file and group, similar
-    to ``ls`` or ``h5ls``. Supports wildcards in group names.
-
-
-    Parameters
-    ----------
-    lh5_file
-        name of file.
-    lh5_group
-        group to search. add a ``/`` to the end of the group name if you want to
-        list all objects inside that group.
-    """
-
-    log.debug(
-        f"Listing objects in '{lh5_file}'"
-        + ("" if lh5_group == "" else f" (and group {lh5_group})")
+    warn(
+        "lgdo.lh5_store has moved to a subfolder lgdo.lh5. "
+        "Please replace 'from lgdo.lh5_store import ls' with 'from lgdo.lh5 import ls'. "
+        "lgdo.lh5_store will be removed in a future release.",
+        DeprecationWarning,
+        stacklevel=2,
     )
-
-    lh5_st = LH5Store()
-    # To use recursively, make lh5_file a h5group instead of a string
-    if isinstance(lh5_file, str):
-        lh5_file = lh5_st.gimme_file(lh5_file, "r")
-        if lh5_group.startswith("/"):
-            lh5_group = lh5_group[1:]
-
-    if lh5_group == "":
-        lh5_group = "*"
-
-    splitpath = lh5_group.split("/", 1)
-    matchingkeys = fnmatch.filter(lh5_file.keys(), splitpath[0])
-
-    if len(splitpath) == 1:
-        return matchingkeys
-    else:
-        ret = []
-        for key in matchingkeys:
-            ret.extend([f"{key}/{path}" for path in ls(lh5_file[key], splitpath[1])])
-        return ret
+    return lh5.ls(lh5_file, lh5_group)
 
 
 def show(
@@ -1337,495 +182,11 @@ def show(
     indent: str = "",
     header: bool = True,
 ) -> None:
-    """Print a tree of LH5 file contents with LGDO datatype.
-
-    Parameters
-    ----------
-    lh5_file
-        the LH5 file.
-    lh5_group
-        print only contents of this HDF5 group.
-    attrs
-        print the HDF5 attributes too.
-    indent
-        indent the diagram with this string.
-    header
-        print `lh5_group` at the top of the diagram.
-
-    Examples
-    --------
-    >>> from lgdo import show
-    >>> show("file.lh5", "/geds/raw")
-    /geds/raw
-    ├── channel · array<1>{real}
-    ├── energy · array<1>{real}
-    ├── timestamp · array<1>{real}
-    ├── waveform · table{t0,dt,values}
-    │   ├── dt · array<1>{real}
-    │   ├── t0 · array<1>{real}
-    │   └── values · array_of_equalsized_arrays<1,1>{real}
-    └── wf_std · array<1>{real}
-    """
-    # open file
-    if isinstance(lh5_file, str):
-        lh5_file = h5py.File(expand_path(lh5_file), "r")
-
-    # go to group
-    if lh5_group != "/":
-        lh5_file = lh5_file[lh5_group]
-
-    if header:
-        print(f"\033[1m{lh5_group}\033[0m")  # noqa: T201
-
-    # get an iterator over the keys in the group
-    it = iter(lh5_file)
-    key = None
-
-    # make sure there is actually something in this file/group
-    try:
-        key = next(it)  # get first key
-    except StopIteration:
-        print(f"{indent}└──  empty")  # noqa: T201
-        return
-
-    # loop over keys
-    while True:
-        val = lh5_file[key]
-        # we want to print the LGDO datatype
-        dtype = val.attrs.get("datatype", default="no datatype")
-        if dtype == "no datatype" and isinstance(val, h5py.Group):
-            dtype = "HDF5 group"
-
-        _attrs = ""
-        if attrs:
-            attrs_d = dict(val.attrs)
-            attrs_d.pop("datatype", "")
-            _attrs = "── " + str(attrs_d) if attrs_d else ""
-
-        # is this the last key?
-        killme = False
-        try:
-            k_new = next(it)  # get next key
-        except StopIteration:
-            char = "└──"
-            killme = True  # we'll have to kill this loop later
-        else:
-            char = "├──"
-
-        print(f"{indent}{char} \033[1m{key}\033[0m · {dtype} {_attrs}")  # noqa: T201
-
-        # if it's a group, call this function recursively
-        if isinstance(val, h5py.Group):
-            show(
-                val,
-                indent=indent + ("    " if killme else "│   "),
-                header=False,
-                attrs=attrs,
-            )
-
-        # break or move to next key
-        if killme:
-            break
-        else:
-            key = k_new
-
-
-def load_nda(
-    f_list: str | list[str],
-    par_list: list[str],
-    lh5_group: str = "",
-    idx_list: list[np.ndarray | list | tuple] = None,
-) -> dict[str, np.ndarray]:
-    r"""Build a dictionary of :class:`numpy.ndarray`\ s from LH5 data.
-
-    Given a list of files, a list of LH5 table parameters, and an optional
-    group path, return a NumPy array with all values for each parameter.
-
-    Parameters
-    ----------
-    f_list
-        A list of files. Can contain wildcards.
-    par_list
-        A list of parameters to read from each file.
-    lh5_group
-        group path within which to find the specified parameters.
-    idx_list
-        for fancy-indexed reads. Must be one index array for each file in
-        `f_list`.
-
-    Returns
-    -------
-    par_data
-        A dictionary of the parameter data keyed by the elements of `par_list`.
-        Each entry contains the data for the specified parameter concatenated
-        over all files in `f_list`.
-    """
-    if isinstance(f_list, str):
-        f_list = [f_list]
-        if idx_list is not None:
-            idx_list = [idx_list]
-    if idx_list is not None and len(f_list) != len(idx_list):
-        raise ValueError(
-            f"f_list length ({len(f_list)}) != idx_list length ({len(idx_list)})!"
-        )
-
-    # Expand wildcards
-    f_list = [f for f_wc in f_list for f in sorted(glob.glob(os.path.expandvars(f_wc)))]
-
-    sto = LH5Store()
-    par_data = {par: [] for par in par_list}
-    for ii, f in enumerate(f_list):
-        f = sto.gimme_file(f, "r")
-        for par in par_list:
-            if f"{lh5_group}/{par}" not in f:
-                raise RuntimeError(f"'{lh5_group}/{par}' not in file {f_list[ii]}")
-
-            if idx_list is None:
-                data, _ = sto.read_object(f"{lh5_group}/{par}", f)
-            else:
-                data, _ = sto.read_object(f"{lh5_group}/{par}", f, idx=idx_list[ii])
-            if not data:
-                continue
-            par_data[par].append(data.nda)
-    par_data = {par: np.concatenate(par_data[par]) for par in par_list}
-    return par_data
-
-
-def load_dfs(
-    f_list: str | list[str],
-    par_list: list[str],
-    lh5_group: str = "",
-    idx_list: list[np.ndarray | list | tuple] = None,
-) -> pd.DataFrame:
-    """Build a :class:`pandas.DataFrame` from LH5 data.
-
-    Given a list of files (can use wildcards), a list of LH5 columns, and
-    optionally the group path, return a :class:`pandas.DataFrame` with all
-    values for each parameter.
-
-    See Also
-    --------
-    :func:`load_nda`
-
-    Returns
-    -------
-    dataframe
-        contains columns for each parameter in `par_list`, and rows containing
-        all data for the associated parameters concatenated over all files in
-        `f_list`.
-    """
-    return pd.DataFrame(
-        load_nda(f_list, par_list, lh5_group=lh5_group, idx_list=idx_list)
+    warn(
+        "lgdo.lh5_store has moved to a subfolder lgdo.lh5. "
+        "Please replace 'from lgdo.lh5_store import show' with 'from lgdo.lh5 import show'. "
+        "lgdo.lh5_store will be removed in a future release.",
+        DeprecationWarning,
+        stacklevel=2,
     )
-
-
-class LH5Iterator(Iterator):
-    """
-    A class for iterating through one or more LH5 files, one block of entries
-    at a time. This also accepts an entry list/mask to enable event selection,
-    and a field mask.
-
-    This class can be used either for random access:
-
-    >>> lh5_obj, n_rows = lh5_it.read(entry)
-
-    to read the block of entries starting at entry. In case of multiple files
-    or the use of an event selection, entry refers to a global event index
-    across files and does not count events that are excluded by the selection.
-
-    This can also be used as an iterator:
-
-    >>> for lh5_obj, entry, n_rows in LH5Iterator(...):
-    >>>    # do the thing!
-
-    This is intended for if you are reading a large quantity of data but
-    want to limit your memory usage (particularly when reading in waveforms!).
-    The ``lh5_obj`` that is read by this class is reused in order to avoid
-    reallocation of memory; this means that if you want to hold on to data
-    between reads, you will have to copy it somewhere!
-    """
-
-    def __init__(
-        self,
-        lh5_files: str | list[str],
-        groups: str | list[str],
-        base_path: str = "",
-        entry_list: list[int] | list[list[int]] = None,
-        entry_mask: list[bool] | list[list[bool]] = None,
-        field_mask: dict[str, bool] | list[str] | tuple[str] = None,
-        buffer_len: int = 3200,
-        friend: LH5Iterator = None,
-    ) -> None:
-        """
-        Parameters
-        ----------
-        lh5_files
-            file or files to read from. May include wildcards and environment
-            variables.
-        groups
-            HDF5 group(s) to read. If a list is provided for both lh5_files
-            and group, they must be the same size. If a file is wild-carded,
-            the same group will be assigned to each file found
-        entry_list
-            list of entry numbers to read. If a nested list is provided,
-            expect one top-level list for each file, containing a list of
-            local entries. If a list of ints is provided, use global entries.
-        entry_mask
-            mask of entries to read. If a list of arrays is provided, expect
-            one for each file. Ignore if a selection list is provided.
-        field_mask
-            mask of which fields to read. See :meth:`LH5Store.read_object` for
-            more details.
-        buffer_len
-            number of entries to read at a time while iterating through files.
-        friend
-            a ''friend'' LH5Iterator that will be read in parallel with this.
-            The friend should have the same length and entry list. A single
-            LH5 table containing columns from both iterators will be returned.
-        """
-        self.lh5_st = LH5Store(base_path=base_path, keep_open=True)
-
-        # List of files, with wildcards and env vars expanded
-        if isinstance(lh5_files, str):
-            lh5_files = [lh5_files]
-            if isinstance(groups, list):
-                lh5_files *= len(groups)
-        elif not isinstance(lh5_files, list):
-            raise ValueError("lh5_files must be a string or list of strings")
-
-        if isinstance(groups, str):
-            groups = [groups] * len(lh5_files)
-        elif not isinstance(groups, list):
-            raise ValueError("group must be a string or list of strings")
-
-        if not len(groups) == len(lh5_files):
-            raise ValueError("lh5_files and groups must have same length")
-
-        self.lh5_files = []
-        self.groups = []
-        for f, g in zip(lh5_files, groups):
-            f_exp = expand_path(f, list=True, base_path=base_path)
-            self.lh5_files += f_exp
-            self.groups += [g] * len(f_exp)
-
-        if entry_list is not None and entry_mask is not None:
-            raise ValueError(
-                "entry_list and entry_mask arguments are mutually exclusive"
-            )
-
-        # Map to last row in each file
-        self.file_map = np.full(len(self.lh5_files), np.iinfo("i").max, "i")
-        # Map to last iterator entry for each file
-        self.entry_map = np.full(len(self.lh5_files), np.iinfo("i").max, "i")
-        self.buffer_len = buffer_len
-
-        if len(self.lh5_files) > 0:
-            f = self.lh5_files[0]
-            g = self.groups[0]
-            self.lh5_buffer = self.lh5_st.get_buffer(
-                g,
-                f,
-                size=self.buffer_len,
-                field_mask=field_mask,
-            )
-            self.file_map[0] = self.lh5_st.read_n_rows(g, f)
-        else:
-            raise RuntimeError(f"can't open any files from {lh5_files}")
-
-        self.n_rows = 0
-        self.current_entry = 0
-        self.next_entry = 0
-
-        self.field_mask = field_mask
-
-        # List of entry indices from each file
-        self.local_entry_list = None
-        self.global_entry_list = None
-        if entry_list is not None:
-            entry_list = list(entry_list)
-            if isinstance(entry_list[0], int):
-                self.local_entry_list = [None] * len(self.file_map)
-                self.global_entry_list = np.array(entry_list, "i")
-                self.global_entry_list.sort()
-
-            else:
-                self.local_entry_list = [[]] * len(self.file_map)
-                for i_file, local_list in enumerate(entry_list):
-                    self.local_entry_list[i_file] = np.array(local_list, "i")
-                    self.local_entry_list[i_file].sort()
-
-        elif entry_mask is not None:
-            # Convert entry mask into an entry list
-            if isinstance(entry_mask, pd.Series):
-                entry_mask = entry_mask.values
-            if isinstance(entry_mask, np.ndarray):
-                self.local_entry_list = [None] * len(self.file_map)
-                self.global_entry_list = np.nonzero(entry_mask)[0]
-            else:
-                self.local_entry_list = [[]] * len(self.file_map)
-                for i_file, local_mask in enumerate(entry_mask):
-                    self.local_entry_list[i_file] = np.nonzero(local_mask)[0]
-
-        # Attach the friend
-        if friend is not None:
-            if not isinstance(friend, LH5Iterator):
-                raise ValueError("Friend must be an LH5Iterator")
-            self.lh5_buffer.join(friend.lh5_buffer)
-        self.friend = friend
-
-    def _get_file_cumlen(self, i_file: int) -> int:
-        """Helper to get cumulative file length of file"""
-        if i_file < 0:
-            return 0
-        fcl = self.file_map[i_file]
-        if fcl == np.iinfo("i").max:
-            fcl = self._get_file_cumlen(i_file - 1) + self.lh5_st.read_n_rows(
-                self.groups[i_file], self.lh5_files[i_file]
-            )
-            self.file_map[i_file] = fcl
-        return fcl
-
-    def _get_file_cumentries(self, i_file: int) -> int:
-        """Helper to get cumulative iterator entries in file"""
-        if i_file < 0:
-            return 0
-        n = self.entry_map[i_file]
-        if n == np.iinfo("i").max:
-            elist = self.get_file_entrylist(i_file)
-            fcl = self._get_file_cumlen(i_file)
-            if elist is None:
-                # no entry list provided
-                n = fcl
-            else:
-                file_entries = self.get_file_entrylist(i_file)
-                n = len(file_entries)
-                # check that file entries fall inside of file
-                if n > 0 and file_entries[-1] >= fcl:
-                    logging.warning(f"Found entries out of range for file {i_file}")
-                    n = np.searchsorted(file_entries, fcl, "right")
-                n += self._get_file_cumentries(i_file - 1)
-            self.entry_map[i_file] = n
-        return n
-
-    def get_file_entrylist(self, i_file: int) -> np.ndarray:
-        """Helper to get entry list for file"""
-        # If no entry list is provided
-        if self.local_entry_list is None:
-            return None
-
-        elist = self.local_entry_list[i_file]
-        if elist is None:
-            # Get local entrylist for this file from global entry list
-            f_start = self._get_file_cumlen(i_file - 1)
-            f_end = self._get_file_cumlen(i_file)
-            i_start = self._get_file_cumentries(i_file - 1)
-            i_stop = np.searchsorted(self.global_entry_list, f_end, "right")
-            elist = np.array(self.global_entry_list[i_start:i_stop], "i") - f_start
-            self.local_entry_list[i_file] = elist
-        return elist
-
-    def get_global_entrylist(self) -> np.ndarray:
-        """Get global entry list, constructing it if needed"""
-        if self.global_entry_list is None and self.local_entry_list is not None:
-            self.global_entry_list = np.zeros(len(self), "i")
-            for i_file in range(len(self.lh5_files)):
-                i_start = self.get_file_cumentries(i_file - 1)
-                i_stop = self.get_file_cumentries(i_file)
-                f_start = self.get_file_cumlen(i_file - 1)
-                self.global_entry_list[i_start:i_stop] = (
-                    self.get_file_entrylist(i_file) + f_start
-                )
-        return self.global_entry_list
-
-    def read(self, entry: int) -> tuple[LGDO, int]:
-        """Read the nextlocal chunk of events, starting at entry. Return the
-        LH5 buffer and number of rows read."""
-        self.n_rows = 0
-        i_file = np.searchsorted(self.entry_map, entry, "right")
-
-        # if file hasn't been opened yet, search through files
-        # sequentially until we find the right one
-        if i_file < len(self.lh5_files) and self.entry_map[i_file] == np.iinfo("i").max:
-            while i_file < len(self.lh5_files) and entry >= self._get_file_cumentries(
-                i_file
-            ):
-                i_file += 1
-
-        if i_file == len(self.lh5_files):
-            return (self.lh5_buffer, self.n_rows)
-        local_entry = entry - self._get_file_cumentries(i_file - 1)
-
-        while self.n_rows < self.buffer_len and i_file < len(self.file_map):
-            # Loop through files
-            local_idx = self.get_file_entrylist(i_file)
-            if local_idx is not None and len(local_idx) == 0:
-                i_file += 1
-                local_entry = 0
-                continue
-
-            i_local = local_idx[local_entry] if local_idx is not None else local_entry
-            self.lh5_buffer, n_rows = self.lh5_st.read_object(
-                self.groups[i_file],
-                self.lh5_files[i_file],
-                start_row=i_local,
-                n_rows=self.buffer_len - self.n_rows,
-                idx=local_idx,
-                field_mask=self.field_mask,
-                obj_buf=self.lh5_buffer,
-                obj_buf_start=self.n_rows,
-            )
-
-            self.n_rows += n_rows
-            i_file += 1
-            local_entry = 0
-
-        self.current_entry = entry
-
-        if self.friend is not None:
-            self.friend.read(entry)
-
-        return (self.lh5_buffer, self.n_rows)
-
-    def reset_field_mask(self, mask):
-        """Replaces the field mask of this iterator and any friends with mask"""
-        self.field_mask = mask
-        if self.friend is not None:
-            self.friend.reset_field_mask(mask)
-
-    def __len__(self) -> int:
-        """Return the total number of entries."""
-        return (
-            self._get_file_cumentries(len(self.lh5_files) - 1)
-            if len(self.entry_map) > 0
-            else 0
-        )
-
-    def __iter__(self) -> Iterator:
-        """Loop through entries in blocks of size buffer_len."""
-        self.current_entry = 0
-        self.next_entry = 0
-        return self
-
-    def __next__(self) -> tuple[LGDO, int, int]:
-        """Read next buffer_len entries and return lh5_table, iterator entry
-        and n_rows read."""
-        buf, n_rows = self.read(self.next_entry)
-        self.next_entry = self.current_entry + n_rows
-        if n_rows == 0:
-            raise StopIteration
-        return (buf, self.current_entry, n_rows)
-
-
-@nb.njit(parallel=False, fastmath=True)
-def _make_fd_idx(starts, stops, idx):
-    k = 0
-    if len(starts) < len(stops):
-        for i in range(stops[0]):
-            idx[k] = i
-            k += 1
-        stops = stops[1:]
-    for j in range(len(starts)):
-        for i in range(starts[j], stops[j]):
-            idx[k] = i
-            k += 1
-    return (idx,)
+    lh5.show(lh5_file, lh5_group, attrs, indent, header)
diff --git a/src/lgdo/types/array.py b/src/lgdo/types/array.py
index 30a47bd2..54fd76f3 100644
--- a/src/lgdo/types/array.py
+++ b/src/lgdo/types/array.py
@@ -10,7 +10,7 @@
 
 import numpy as np
 
-from .. import lgdo_utils as utils
+from .. import utils as utils
 from .lgdo import LGDO
 
 log = logging.getLogger(__name__)
diff --git a/src/lgdo/types/arrayofequalsizedarrays.py b/src/lgdo/types/arrayofequalsizedarrays.py
index 95884bc9..bf16ed8d 100644
--- a/src/lgdo/types/arrayofequalsizedarrays.py
+++ b/src/lgdo/types/arrayofequalsizedarrays.py
@@ -9,7 +9,7 @@
 
 import numpy as np
 
-from .. import lgdo_utils as utils
+from .. import utils as utils
 from . import vectorofvectors as vov
 from .array import Array
 
diff --git a/src/lgdo/types/encoded.py b/src/lgdo/types/encoded.py
index 68886273..766001b3 100644
--- a/src/lgdo/types/encoded.py
+++ b/src/lgdo/types/encoded.py
@@ -6,7 +6,7 @@
 import numpy as np
 from numpy.typing import NDArray
 
-from .. import lgdo_utils as utils
+from .. import utils as utils
 from .array import Array
 from .lgdo import LGDO
 from .scalar import Scalar
diff --git a/src/lgdo/types/scalar.py b/src/lgdo/types/scalar.py
index 6b793137..e79bb932 100644
--- a/src/lgdo/types/scalar.py
+++ b/src/lgdo/types/scalar.py
@@ -7,7 +7,7 @@
 
 import numpy as np
 
-from .. import lgdo_utils as utils
+from .. import utils as utils
 from .lgdo import LGDO
 
 log = logging.getLogger(__name__)
diff --git a/src/lgdo/types/vectorofvectors.py b/src/lgdo/types/vectorofvectors.py
index 7d227a52..2b0d7f13 100644
--- a/src/lgdo/types/vectorofvectors.py
+++ b/src/lgdo/types/vectorofvectors.py
@@ -13,7 +13,7 @@
 import numpy as np
 from numpy.typing import DTypeLike, NDArray
 
-from .. import lgdo_utils as utils
+from .. import utils as utils
 from . import arrayofequalsizedarrays as aoesa
 from .array import Array
 from .lgdo import LGDO
diff --git a/src/lgdo/utils.py b/src/lgdo/utils.py
new file mode 100644
index 00000000..22866a35
--- /dev/null
+++ b/src/lgdo/utils.py
@@ -0,0 +1,84 @@
+"""Implements utilities for LEGEND Data Objects."""
+from __future__ import annotations
+
+import logging
+
+import numpy as np
+
+from . import types as lgdo
+
+log = logging.getLogger(__name__)
+
+
+def get_element_type(obj: object) -> str:
+    """Get the LGDO element type of a scalar or array.
+
+    For use in LGDO datatype attributes.
+
+    Parameters
+    ----------
+    obj
+        if a ``str``, will automatically return ``string`` if the object has
+        a :class:`numpy.dtype`, that will be used for determining the element
+        type otherwise will attempt to case the type of the object to a
+        :class:`numpy.dtype`.
+
+    Returns
+    -------
+    element_type
+        A string stating the determined element type of the object.
+    """
+
+    # special handling for strings
+    if isinstance(obj, str):
+        return "string"
+
+    # the rest use dtypes
+    dt = obj.dtype if hasattr(obj, "dtype") else np.dtype(type(obj))
+    kind = dt.kind
+
+    if kind == "b":
+        return "bool"
+    if kind == "V":
+        return "blob"
+    if kind in ["i", "u", "f"]:
+        return "real"
+    if kind == "c":
+        return "complex"
+    if kind in ["S", "U"]:
+        return "string"
+
+    # couldn't figure it out
+    raise ValueError(
+        "cannot determine lgdo element_type for object of type", type(obj).__name__
+    )
+
+
+def copy(obj: lgdo.LGDO, dtype: np.dtype = None) -> lgdo.LGDO:
+    """Return a copy of an LGDO.
+
+    Parameters
+    ----------
+    obj
+        the LGDO to be copied.
+    dtype
+        NumPy dtype to be used for the copied object.
+
+    """
+    if dtype is None:
+        dtype = obj.dtype
+
+    if isinstance(obj, lgdo.Array):
+        return lgdo.Array(
+            np.array(obj.nda, dtype=dtype, copy=True), attrs=dict(obj.attrs)
+        )
+
+    if isinstance(obj, lgdo.VectorOfVectors):
+        return lgdo.VectorOfVectors(
+            flattened_data=copy(obj.flattened_data, dtype=dtype),
+            cumulative_length=copy(obj.cumulative_length),
+            attrs=dict(obj.attrs),
+        )
+
+    else:
+        raise ValueError(f"copy of {type(obj)} not supported")
diff --git a/tests/compression/conftest.py b/tests/compression/conftest.py
index 927ba1ff..e69cc307 100644
--- a/tests/compression/conftest.py
+++ b/tests/compression/conftest.py
@@ -1,12 +1,12 @@
 import pytest
 
-from lgdo import LH5Store
+import lgdo.lh5 as lh5
 
 
 @pytest.fixture()
 def wftable(lgnd_test_data):
-    store = LH5Store()
-    wft, _ = store.read_object(
+    store = lh5.LH5Store()
+    wft, _ = store.read(
         "/geds/raw/waveform",
         lgnd_test_data.get_path("lh5/LDQTA_r117_20200110T105115Z_cal_geds_raw.lh5"),
     )
diff --git a/tests/compression/test_radware_sigcompress.py b/tests/compression/test_radware_sigcompress.py
index aacf38f6..fe0bdd99 100644
--- a/tests/compression/test_radware_sigcompress.py
+++ b/tests/compression/test_radware_sigcompress.py
@@ -2,7 +2,8 @@
 
 import numpy as np
 
-from lgdo import ArrayOfEncodedEqualSizedArrays, ArrayOfEqualSizedArrays, LH5Store
+import lgdo.lh5 as lh5
+from lgdo import ArrayOfEncodedEqualSizedArrays, ArrayOfEqualSizedArrays
 from lgdo.compression.radware import (
     _get_hton_u16,
     _radware_sigcompress_decode,
@@ -177,8 +178,8 @@ def test_aoesa(wftable):
 
 
 def test_performance(lgnd_test_data):
-    store = LH5Store()
-    obj, _ = store.read_object(
+    store = lh5.LH5Store()
+    obj, _ = store.read(
         "/geds/raw/waveform",
         lgnd_test_data.get_path("lh5/LDQTA_r117_20200110T105115Z_cal_geds_raw.lh5"),
     )
diff --git a/tests/test_lh5_iterator.py b/tests/lh5/test_lh5_iterator.py
similarity index 92%
rename from tests/test_lh5_iterator.py
rename to tests/lh5/test_lh5_iterator.py
index 09297665..95e575af 100644
--- a/tests/test_lh5_iterator.py
+++ b/tests/lh5/test_lh5_iterator.py
@@ -2,7 +2,7 @@
 import pytest
 
 import lgdo
-from lgdo.lh5_store import LH5Iterator
+import lgdo.lh5 as lh5
 
 
 @pytest.fixture(scope="module")
@@ -11,7 +11,7 @@ def lgnd_file(lgnd_test_data):
 
 
 def test_basics(lgnd_file):
-    lh5_it = LH5Iterator(
+    lh5_it = lh5.LH5Iterator(
         lgnd_file,
         "/geds/raw",
         entry_list=range(100),
@@ -35,14 +35,14 @@ def test_basics(lgnd_file):
 
 def test_errors(lgnd_file):
     with pytest.raises(RuntimeError):
-        LH5Iterator("non-existent-file.lh5", "random-group")
+        lh5.LH5Iterator("non-existent-file.lh5", "random-group")
 
     with pytest.raises(ValueError):
-        LH5Iterator(1, 2)
+        lh5.LH5Iterator(1, 2)
 
 
 def test_lgnd_waveform_table_fancy_idx(lgnd_file):
-    lh5_it = LH5Iterator(
+    lh5_it = lh5.LH5Iterator(
         lgnd_file,
         "geds/raw/waveform",
         entry_list=[
@@ -97,13 +97,13 @@ def more_lgnd_files(lgnd_test_data):
 
 
 def test_friend(more_lgnd_files):
-    lh5_raw_it = LH5Iterator(
+    lh5_raw_it = lh5.LH5Iterator(
         more_lgnd_files[0],
         "ch1084803/raw",
         field_mask=["waveform", "baseline"],
         buffer_len=5,
     )
-    lh5_it = LH5Iterator(
+    lh5_it = lh5.LH5Iterator(
         more_lgnd_files[1],
         "ch1084803/hit",
         field_mask=["is_valid_0vbb"],
@@ -121,7 +121,7 @@ def test_friend(more_lgnd_files):
 def test_iterate(more_lgnd_files):
     # iterate through all hit groups in all files; there are 10 entries in
     # each group/file
-    lh5_it = LH5Iterator(
+    lh5_it = lh5.LH5Iterator(
         more_lgnd_files[1] * 3,
         ["ch1084803/hit"] * 2 + ["ch1084804/hit"] * 2 + ["ch1121600/hit"] * 2,
         field_mask=["is_valid_0vbb", "timestamp", "zacEmax_ctc_cal"],
diff --git a/tests/test_lh5_store.py b/tests/lh5/test_lh5_store.py
similarity index 71%
rename from tests/test_lh5_store.py
rename to tests/lh5/test_lh5_store.py
index 25491660..9d2d254c 100644
--- a/tests/test_lh5_store.py
+++ b/tests/lh5/test_lh5_store.py
@@ -7,10 +7,11 @@
 import pytest
 
 import lgdo
-import lgdo.lh5_store as lh5
+import lgdo.lh5 as lh5
+import lgdo.types as types
 from lgdo import compression
 from lgdo.compression import RadwareSigcompress
-from lgdo.lh5_store import DEFAULT_HDF5_SETTINGS, LH5Store
+from lgdo.lh5.store import DEFAULT_HDF5_SETTINGS
 
 
 @pytest.fixture(scope="module")
@@ -19,11 +20,11 @@ def lgnd_file(lgnd_test_data):
 
 
 def test_init():
-    LH5Store()
+    lh5.LH5Store()
 
 
 def test_gimme_file(lgnd_file):
-    store = LH5Store(keep_open=True)
+    store = lh5.LH5Store(keep_open=True)
 
     f = store.gimme_file(lgnd_file)
     assert isinstance(f, h5py.File)
@@ -35,7 +36,7 @@ def test_gimme_file(lgnd_file):
 
 def test_gimme_group(lgnd_file, tmptestdir):
     f = h5py.File(lgnd_file)
-    store = LH5Store()
+    store = lh5.LH5Store()
     g = store.gimme_group("/geds", f)
     assert isinstance(g, h5py.Group)
 
@@ -44,12 +45,6 @@ def test_gimme_group(lgnd_file, tmptestdir):
     assert isinstance(g, h5py.Group)
 
 
-def test_show(lgnd_file):
-    lh5.show(lgnd_file)
-    lh5.show(lgnd_file, "/geds/raw")
-    lh5.show(lgnd_file, "geds/raw")
-
-
 def test_ls(lgnd_file):
     assert lh5.ls(lgnd_file) == ["geds"]
     assert lh5.ls(lgnd_file, "/*/raw") == ["geds/raw"]
@@ -68,6 +63,12 @@ def test_ls(lgnd_file):
     ]
 
 
+def test_show(lgnd_file):
+    lh5.show(lgnd_file)
+    lh5.show(lgnd_file, "/geds/raw")
+    lh5.show(lgnd_file, "geds/raw")
+
+
 def test_load_nda(lgnd_file):
     nda = lh5.load_nda(
         [lgnd_file, lgnd_file],
@@ -83,49 +84,38 @@ def test_load_nda(lgnd_file):
     assert nda["waveform/values"].shape == (6, 5592)
 
 
-def test_load_dfs(lgnd_file):
-    dfs = lh5.load_dfs(
-        [lgnd_file, lgnd_file],
-        ["baseline", "waveform/t0"],
-        lh5_group="/geds/raw",
-        idx_list=[[1, 3, 5], [2, 6, 7]],
-    )
-
-    assert isinstance(dfs, pd.DataFrame)
-
-
 @pytest.fixture(scope="module")
 def lh5_file(tmptestdir):
-    store = LH5Store()
+    store = lh5.LH5Store()
 
     struct = lgdo.Struct()
     struct.add_field("scalar", lgdo.Scalar(value=10, attrs={"sth": 1}))
-    struct.add_field("array", lgdo.Array(nda=np.array([1, 2, 3, 4, 5])))
+    struct.add_field("array", types.Array(nda=np.array([1, 2, 3, 4, 5])))
     struct.add_field(
         "aoesa",
-        lgdo.ArrayOfEqualSizedArrays(shape=(5, 5), dtype=np.float32, fill_val=42),
+        types.ArrayOfEqualSizedArrays(shape=(5, 5), dtype=np.float32, fill_val=42),
     )
     struct.add_field(
         "vov",
-        lgdo.VectorOfVectors(
-            flattened_data=lgdo.Array(
+        types.VectorOfVectors(
+            flattened_data=types.Array(
                 nda=np.array([1, 2, 3, 4, 5, 2, 4, 8, 9, 7, 5, 3, 1])
             ),
-            cumulative_length=lgdo.Array(nda=np.array([2, 5, 6, 10, 13])),
+            cumulative_length=types.Array(nda=np.array([2, 5, 6, 10, 13])),
             attrs={"myattr": 2},
         ),
     )
 
     struct.add_field(
         "voev",
-        lgdo.VectorOfEncodedVectors(
-            encoded_data=lgdo.VectorOfVectors(
-                flattened_data=lgdo.Array(
+        types.VectorOfEncodedVectors(
+            encoded_data=types.VectorOfVectors(
+                flattened_data=types.Array(
                     nda=np.array([1, 2, 3, 4, 5, 2, 4, 8, 9, 7, 5, 3, 1])
                 ),
-                cumulative_length=lgdo.Array(nda=np.array([2, 5, 6, 10, 13])),
+                cumulative_length=types.Array(nda=np.array([2, 5, 6, 10, 13])),
             ),
-            decoded_size=lgdo.Array(shape=5, fill_val=6),
+            decoded_size=types.Array(shape=5, fill_val=6),
         ),
     )
 
@@ -142,14 +132,14 @@ def lh5_file(tmptestdir):
         ),
     }
 
-    struct.add_field("table", lgdo.Table(col_dict=col_dict, attrs={"stuff": 5}))
+    struct.add_field("table", types.Table(col_dict=col_dict, attrs={"stuff": 5}))
 
     struct.add_field(
         "wftable",
-        lgdo.WaveformTable(
-            t0=lgdo.Array(np.zeros(10)),
-            dt=lgdo.Array(np.full(10, fill_value=1)),
-            values=lgdo.ArrayOfEqualSizedArrays(
+        types.WaveformTable(
+            t0=types.Array(np.zeros(10)),
+            dt=types.Array(np.full(10, fill_value=1)),
+            values=types.ArrayOfEqualSizedArrays(
                 shape=(10, 1000), dtype=np.uint16, fill_val=100, attrs={"custom": 8}
             ),
         ),
@@ -157,16 +147,16 @@ def lh5_file(tmptestdir):
 
     struct.add_field(
         "wftable_enc",
-        lgdo.WaveformTable(
-            t0=lgdo.Array(np.zeros(10)),
-            dt=lgdo.Array(np.full(10, fill_value=1)),
+        types.WaveformTable(
+            t0=types.Array(np.zeros(10)),
+            dt=types.Array(np.full(10, fill_value=1)),
             values=compression.encode(
                 struct["wftable"].values, codec=RadwareSigcompress(codec_shift=-32768)
             ),
         ),
     )
 
-    store.write_object(
+    store.write(
         struct,
         "struct",
         f"{tmptestdir}/tmp-pygama-lgdo-types.lh5",
@@ -176,7 +166,7 @@ def lh5_file(tmptestdir):
         wo_mode="overwrite_file",
     )
 
-    store.write_object(
+    store.write(
         struct,
         "struct_full",
         f"{tmptestdir}/tmp-pygama-lgdo-types.lh5",
@@ -194,7 +184,7 @@ def test_write_objects(lh5_file):
 
 
 def test_read_n_rows(lh5_file):
-    store = LH5Store()
+    store = lh5.LH5Store()
     assert store.read_n_rows("/data/struct_full/aoesa", lh5_file) == 5
     assert store.read_n_rows("/data/struct_full/array", lh5_file) == 5
     assert store.read_n_rows("/data/struct_full/scalar", lh5_file) is None
@@ -206,14 +196,14 @@ def test_read_n_rows(lh5_file):
 
 
 def test_get_buffer(lh5_file):
-    store = LH5Store()
+    store = lh5.LH5Store()
     buf = store.get_buffer("/data/struct_full/wftable_enc", lh5_file)
-    assert isinstance(buf.values, lgdo.ArrayOfEqualSizedArrays)
+    assert isinstance(buf.values, types.ArrayOfEqualSizedArrays)
 
 
 def test_read_scalar(lh5_file):
-    store = LH5Store()
-    lh5_obj, n_rows = store.read_object("/data/struct/scalar", lh5_file)
+    store = lh5.LH5Store()
+    lh5_obj, n_rows = store.read("/data/struct/scalar", lh5_file)
     assert isinstance(lh5_obj, lgdo.Scalar)
     assert lh5_obj.value == 10
     assert n_rows == 1
@@ -223,9 +213,9 @@ def test_read_scalar(lh5_file):
 
 
 def test_read_array(lh5_file):
-    store = LH5Store()
-    lh5_obj, n_rows = store.read_object("/data/struct/array", lh5_file)
-    assert isinstance(lh5_obj, lgdo.Array)
+    store = lh5.LH5Store()
+    lh5_obj, n_rows = store.read("/data/struct/array", lh5_file)
+    assert isinstance(lh5_obj, types.Array)
     assert (lh5_obj.nda == np.array([2, 3, 4])).all()
     assert n_rows == 3
     with h5py.File(lh5_file) as h5f:
@@ -236,19 +226,17 @@ def test_read_array(lh5_file):
 
 
 def test_read_array_fancy_idx(lh5_file):
-    store = LH5Store()
-    lh5_obj, n_rows = store.read_object(
-        "/data/struct_full/array", lh5_file, idx=[0, 3, 4]
-    )
-    assert isinstance(lh5_obj, lgdo.Array)
+    store = lh5.LH5Store()
+    lh5_obj, n_rows = store.read("/data/struct_full/array", lh5_file, idx=[0, 3, 4])
+    assert isinstance(lh5_obj, types.Array)
     assert (lh5_obj.nda == np.array([1, 4, 5])).all()
     assert n_rows == 3
 
 
 def test_read_vov(lh5_file):
-    store = LH5Store()
-    lh5_obj, n_rows = store.read_object("/data/struct/vov", lh5_file)
-    assert isinstance(lh5_obj, lgdo.VectorOfVectors)
+    store = lh5.LH5Store()
+    lh5_obj, n_rows = store.read("/data/struct/vov", lh5_file)
+    assert isinstance(lh5_obj, types.VectorOfVectors)
 
     desired = [np.array([3, 4, 5]), np.array([2]), np.array([4, 8, 9, 7])]
 
@@ -270,9 +258,9 @@ def test_read_vov(lh5_file):
 
 
 def test_read_vov_fancy_idx(lh5_file):
-    store = LH5Store()
-    lh5_obj, n_rows = store.read_object("/data/struct_full/vov", lh5_file, idx=[0, 2])
-    assert isinstance(lh5_obj, lgdo.VectorOfVectors)
+    store = lh5.LH5Store()
+    lh5_obj, n_rows = store.read("/data/struct_full/vov", lh5_file, idx=[0, 2])
+    assert isinstance(lh5_obj, types.VectorOfVectors)
 
     desired = [np.array([1, 2]), np.array([2])]
 
@@ -283,9 +271,9 @@ def test_read_vov_fancy_idx(lh5_file):
 
 
 def test_read_voev(lh5_file):
-    store = LH5Store()
-    lh5_obj, n_rows = store.read_object("/data/struct/voev", lh5_file, decompress=False)
-    assert isinstance(lh5_obj, lgdo.VectorOfEncodedVectors)
+    store = lh5.LH5Store()
+    lh5_obj, n_rows = store.read("/data/struct/voev", lh5_file, decompress=False)
+    assert isinstance(lh5_obj, types.VectorOfEncodedVectors)
 
     desired = [np.array([3, 4, 5]), np.array([2]), np.array([4, 8, 9, 7])]
 
@@ -294,10 +282,10 @@ def test_read_voev(lh5_file):
 
     assert n_rows == 3
 
-    lh5_obj, n_rows = store.read_object(
+    lh5_obj, n_rows = store.read(
         "/data/struct/voev", [lh5_file, lh5_file], decompress=False
     )
-    assert isinstance(lh5_obj, lgdo.VectorOfEncodedVectors)
+    assert isinstance(lh5_obj, types.VectorOfEncodedVectors)
     assert n_rows == 6
 
     with h5py.File(lh5_file) as h5f:
@@ -313,11 +301,11 @@ def test_read_voev(lh5_file):
 
 
 def test_read_voev_fancy_idx(lh5_file):
-    store = LH5Store()
-    lh5_obj, n_rows = store.read_object(
+    store = lh5.LH5Store()
+    lh5_obj, n_rows = store.read(
         "/data/struct_full/voev", lh5_file, idx=[0, 2], decompress=False
     )
-    assert isinstance(lh5_obj, lgdo.VectorOfEncodedVectors)
+    assert isinstance(lh5_obj, types.VectorOfEncodedVectors)
 
     desired = [np.array([1, 2]), np.array([2])]
 
@@ -328,27 +316,27 @@ def test_read_voev_fancy_idx(lh5_file):
 
 
 def test_read_aoesa(lh5_file):
-    store = LH5Store()
-    lh5_obj, n_rows = store.read_object("/data/struct/aoesa", lh5_file)
-    assert isinstance(lh5_obj, lgdo.ArrayOfEqualSizedArrays)
+    store = lh5.LH5Store()
+    lh5_obj, n_rows = store.read("/data/struct/aoesa", lh5_file)
+    assert isinstance(lh5_obj, types.ArrayOfEqualSizedArrays)
     assert (lh5_obj.nda == np.full((3, 5), fill_value=42)).all()
 
 
 def test_read_table(lh5_file):
-    store = LH5Store()
-    lh5_obj, n_rows = store.read_object("/data/struct/table", lh5_file)
-    assert isinstance(lh5_obj, lgdo.Table)
+    store = lh5.LH5Store()
+    lh5_obj, n_rows = store.read("/data/struct/table", lh5_file)
+    assert isinstance(lh5_obj, types.Table)
     assert n_rows == 3
 
-    lh5_obj, n_rows = store.read_object("/data/struct/table", [lh5_file, lh5_file])
+    lh5_obj, n_rows = store.read("/data/struct/table", [lh5_file, lh5_file])
     assert n_rows == 6
     assert lh5_obj.attrs["stuff"] == 5
     assert lh5_obj["a"].attrs["attr"] == 9
 
 
 def test_read_hdf5_compressed_data(lh5_file):
-    store = LH5Store()
-    lh5_obj, n_rows = store.read_object("/data/struct/table", lh5_file)
+    store = lh5.LH5Store()
+    lh5_obj, n_rows = store.read("/data/struct/table", lh5_file)
 
     assert "compression" not in lh5_obj["b"].attrs
     with h5py.File(lh5_file) as h5f:
@@ -363,12 +351,12 @@ def test_read_hdf5_compressed_data(lh5_file):
 
 
 def test_read_wftable(lh5_file):
-    store = LH5Store()
-    lh5_obj, n_rows = store.read_object("/data/struct/wftable", lh5_file)
-    assert isinstance(lh5_obj, lgdo.WaveformTable)
+    store = lh5.LH5Store()
+    lh5_obj, n_rows = store.read("/data/struct/wftable", lh5_file)
+    assert isinstance(lh5_obj, types.WaveformTable)
     assert n_rows == 3
 
-    lh5_obj, n_rows = store.read_object("/data/struct/wftable", [lh5_file, lh5_file])
+    lh5_obj, n_rows = store.read("/data/struct/wftable", [lh5_file, lh5_file])
     assert n_rows == 6
     assert lh5_obj.values.attrs["custom"] == 8
 
@@ -388,32 +376,30 @@ def test_read_wftable(lh5_file):
 
 
 def test_read_wftable_encoded(lh5_file):
-    store = LH5Store()
-    lh5_obj, n_rows = store.read_object(
-        "/data/struct/wftable_enc", lh5_file, decompress=False
-    )
-    assert isinstance(lh5_obj, lgdo.WaveformTable)
-    assert isinstance(lh5_obj.values, lgdo.ArrayOfEncodedEqualSizedArrays)
+    store = lh5.LH5Store()
+    lh5_obj, n_rows = store.read("/data/struct/wftable_enc", lh5_file, decompress=False)
+    assert isinstance(lh5_obj, types.WaveformTable)
+    assert isinstance(lh5_obj.values, types.ArrayOfEncodedEqualSizedArrays)
     assert n_rows == 3
     assert lh5_obj.values.attrs["codec"] == "radware_sigcompress"
     assert "codec_shift" in lh5_obj.values.attrs
 
-    lh5_obj, n_rows = store.read_object("/data/struct/wftable_enc/values", lh5_file)
+    lh5_obj, n_rows = store.read("/data/struct/wftable_enc/values", lh5_file)
     assert isinstance(lh5_obj, lgdo.ArrayOfEqualSizedArrays)
     assert n_rows == 3
 
-    lh5_obj, n_rows = store.read_object("/data/struct/wftable_enc", lh5_file)
+    lh5_obj, n_rows = store.read("/data/struct/wftable_enc", lh5_file)
     assert isinstance(lh5_obj, lgdo.WaveformTable)
     assert isinstance(lh5_obj.values, lgdo.ArrayOfEqualSizedArrays)
     assert n_rows == 3
 
-    lh5_obj_chain, n_rows = store.read_object(
+    lh5_obj_chain, n_rows = store.read(
         "/data/struct/wftable_enc", [lh5_file, lh5_file], decompress=False
     )
     assert n_rows == 6
     assert isinstance(lh5_obj_chain.values, lgdo.ArrayOfEncodedEqualSizedArrays)
 
-    lh5_obj_chain, n_rows = store.read_object(
+    lh5_obj_chain, n_rows = store.read(
         "/data/struct/wftable_enc", [lh5_file, lh5_file], decompress=True
     )
     assert isinstance(lh5_obj_chain.values, lgdo.ArrayOfEqualSizedArrays)
@@ -440,24 +426,22 @@ def test_read_wftable_encoded(lh5_file):
 
 
 def test_read_with_field_mask(lh5_file):
-    store = LH5Store()
+    store = lh5.LH5Store()
 
-    lh5_obj, n_rows = store.read_object(
-        "/data/struct_full", lh5_file, field_mask=["array"]
-    )
+    lh5_obj, n_rows = store.read("/data/struct_full", lh5_file, field_mask=["array"])
     assert list(lh5_obj.keys()) == ["array"]
 
-    lh5_obj, n_rows = store.read_object(
+    lh5_obj, n_rows = store.read(
         "/data/struct_full", lh5_file, field_mask=("array", "table")
     )
     assert list(lh5_obj.keys()) == ["array", "table"]
 
-    lh5_obj, n_rows = store.read_object(
+    lh5_obj, n_rows = store.read(
         "/data/struct_full", lh5_file, field_mask={"array": True}
     )
     assert list(lh5_obj.keys()) == ["array"]
 
-    lh5_obj, n_rows = store.read_object(
+    lh5_obj, n_rows = store.read(
         "/data/struct_full", lh5_file, field_mask={"vov": False, "voev": False}
     )
     assert list(lh5_obj.keys()) == [
@@ -471,45 +455,45 @@ def test_read_with_field_mask(lh5_file):
 
 
 def test_read_lgnd_array(lgnd_file):
-    store = LH5Store()
+    store = lh5.LH5Store()
 
-    lh5_obj, n_rows = store.read_object("/geds/raw/baseline", lgnd_file)
-    assert isinstance(lh5_obj, lgdo.Array)
+    lh5_obj, n_rows = store.read("/geds/raw/baseline", lgnd_file)
+    assert isinstance(lh5_obj, types.Array)
     assert n_rows == 100
     assert len(lh5_obj) == 100
 
-    lh5_obj, n_rows = store.read_object("/geds/raw/waveform/values", lgnd_file)
-    assert isinstance(lh5_obj, lgdo.ArrayOfEqualSizedArrays)
+    lh5_obj, n_rows = store.read("/geds/raw/waveform/values", lgnd_file)
+    assert isinstance(lh5_obj, types.ArrayOfEqualSizedArrays)
 
 
 def test_read_lgnd_array_fancy_idx(lgnd_file):
-    store = LH5Store()
+    store = lh5.LH5Store()
 
-    lh5_obj, n_rows = store.read_object(
+    lh5_obj, n_rows = store.read(
         "/geds/raw/baseline", lgnd_file, idx=[2, 4, 6, 9, 11, 16, 68]
     )
-    assert isinstance(lh5_obj, lgdo.Array)
+    assert isinstance(lh5_obj, types.Array)
     assert n_rows == 7
     assert len(lh5_obj) == 7
     assert (lh5_obj.nda == [13508, 14353, 14525, 14341, 15079, 11675, 13995]).all()
 
 
 def test_read_lgnd_vov(lgnd_file):
-    store = LH5Store()
+    store = lh5.LH5Store()
 
-    lh5_obj, n_rows = store.read_object("/geds/raw/tracelist", lgnd_file)
-    assert isinstance(lh5_obj, lgdo.VectorOfVectors)
+    lh5_obj, n_rows = store.read("/geds/raw/tracelist", lgnd_file)
+    assert isinstance(lh5_obj, types.VectorOfVectors)
     assert n_rows == 100
     assert len(lh5_obj) == 100
 
 
 def test_read_lgnd_vov_fancy_idx(lgnd_file):
-    store = LH5Store()
+    store = lh5.LH5Store()
 
-    lh5_obj, n_rows = store.read_object(
+    lh5_obj, n_rows = store.read(
         "/geds/raw/tracelist", lgnd_file, idx=[2, 4, 6, 9, 11, 16, 68]
     )
-    assert isinstance(lh5_obj, lgdo.VectorOfVectors)
+    assert isinstance(lh5_obj, types.VectorOfVectors)
     assert n_rows == 7
     assert len(lh5_obj) == 7
     assert (lh5_obj.cumulative_length.nda == [1, 2, 3, 4, 5, 6, 7]).all()
@@ -517,20 +501,20 @@ def test_read_lgnd_vov_fancy_idx(lgnd_file):
 
 
 def test_read_array_concatenation(lgnd_file):
-    store = LH5Store()
-    lh5_obj, n_rows = store.read_object("/geds/raw/baseline", [lgnd_file, lgnd_file])
-    assert isinstance(lh5_obj, lgdo.Array)
+    store = lh5.LH5Store()
+    lh5_obj, n_rows = store.read("/geds/raw/baseline", [lgnd_file, lgnd_file])
+    assert isinstance(lh5_obj, types.Array)
     assert n_rows == 200
     assert len(lh5_obj) == 200
 
 
 def test_read_lgnd_waveform_table(lgnd_file):
-    store = LH5Store()
+    store = lh5.LH5Store()
 
-    lh5_obj, n_rows = store.read_object("/geds/raw/waveform", lgnd_file)
-    assert isinstance(lh5_obj, lgdo.WaveformTable)
+    lh5_obj, n_rows = store.read("/geds/raw/waveform", lgnd_file)
+    assert isinstance(lh5_obj, types.WaveformTable)
 
-    lh5_obj, n_rows = store.read_object(
+    lh5_obj, n_rows = store.read(
         "/geds/raw/waveform",
         lgnd_file,
         start_row=10,
@@ -538,29 +522,29 @@ def test_read_lgnd_waveform_table(lgnd_file):
         field_mask=["t0", "dt"],
     )
 
-    assert isinstance(lh5_obj, lgdo.Table)
+    assert isinstance(lh5_obj, types.Table)
     assert list(lh5_obj.keys()) == ["t0", "dt"]
     assert len(lh5_obj) == 10
 
 
 def test_read_lgnd_waveform_table_fancy_idx(lgnd_file):
-    store = LH5Store()
+    store = lh5.LH5Store()
 
-    lh5_obj, n_rows = store.read_object(
+    lh5_obj, n_rows = store.read(
         "/geds/raw/waveform",
         lgnd_file,
         idx=[7, 9, 25, 27, 33, 38, 46, 52, 57, 59, 67, 71, 72, 82, 90, 92, 93, 94, 97],
     )
-    assert isinstance(lh5_obj, lgdo.WaveformTable)
+    assert isinstance(lh5_obj, types.WaveformTable)
     assert len(lh5_obj) == 19
 
 
 @pytest.fixture(scope="module")
 def enc_lgnd_file(lgnd_file, tmptestdir):
-    store = LH5Store()
-    wft, n_rows = store.read_object("/geds/raw/waveform", lgnd_file)
+    store = lh5.LH5Store()
+    wft, n_rows = store.read("/geds/raw/waveform", lgnd_file)
     wft.values.attrs["compression"] = RadwareSigcompress(codec_shift=-32768)
-    store.write_object(
+    store.write(
         wft,
         "/geds/raw/waveform",
         f"{tmptestdir}/tmp-pygama-compressed-wfs.lh5",
@@ -574,16 +558,16 @@ def test_write_compressed_lgnd_waveform_table(enc_lgnd_file):
 
 
 def test_read_compressed_lgnd_waveform_table(lgnd_file, enc_lgnd_file):
-    store = LH5Store()
-    wft, _ = store.read_object("/geds/raw/waveform", enc_lgnd_file)
-    assert isinstance(wft.values, lgdo.ArrayOfEqualSizedArrays)
+    store = lh5.LH5Store()
+    wft, _ = store.read("/geds/raw/waveform", enc_lgnd_file)
+    assert isinstance(wft.values, types.ArrayOfEqualSizedArrays)
     assert "compression" not in wft.values.attrs
 
 
 def test_write_with_hdf5_compression(lgnd_file, tmptestdir):
-    store = LH5Store()
-    wft, n_rows = store.read_object("/geds/raw/waveform", lgnd_file)
-    store.write_object(
+    store = lh5.LH5Store()
+    wft, n_rows = store.read("/geds/raw/waveform", lgnd_file)
+    store.write(
         wft,
         "/geds/raw/waveform",
         f"{tmptestdir}/tmp-pygama-hdf5-compressed-wfs.lh5",
@@ -597,7 +581,7 @@ def test_write_with_hdf5_compression(lgnd_file, tmptestdir):
         assert h5f["/geds/raw/waveform/values"].compression_opts == 9
         assert h5f["/geds/raw/waveform/values"].shuffle is True
 
-    store.write_object(
+    store.write(
         wft,
         "/geds/raw/waveform",
         f"{tmptestdir}/tmp-pygama-hdf5-compressed-wfs.lh5",
@@ -618,13 +602,13 @@ def test_write_object_overwrite_table_no_deletion(caplog, tmptestdir):
     if os.path.exists(f"{tmptestdir}/write_object_overwrite_test.lh5"):
         os.remove(f"{tmptestdir}/write_object_overwrite_test.lh5")
 
-    tb1 = lh5.Table(col_dict={"dset1": lh5.Array(np.zeros(10))})
-    tb2 = lh5.Table(
-        col_dict={"dset1": lh5.Array(np.ones(10))}
+    tb1 = types.Table(col_dict={"dset1": types.Array(np.zeros(10))})
+    tb2 = types.Table(
+        col_dict={"dset1": types.Array(np.ones(10))}
     )  # Same field name, different values
-    store = LH5Store()
-    store.write_object(tb1, "my_group", f"{tmptestdir}/write_object_overwrite_test.lh5")
-    store.write_object(
+    store = lh5.LH5Store()
+    store.write(tb1, "my_group", f"{tmptestdir}/write_object_overwrite_test.lh5")
+    store.write(
         tb2,
         "my_group",
         f"{tmptestdir}/write_object_overwrite_test.lh5",
@@ -637,9 +621,7 @@ def test_write_object_overwrite_table_no_deletion(caplog, tmptestdir):
     ]
 
     # Now, check that the data were overwritten
-    tb_dat, _ = store.read_object(
-        "my_group", f"{tmptestdir}/write_object_overwrite_test.lh5"
-    )
+    tb_dat, _ = store.read("my_group", f"{tmptestdir}/write_object_overwrite_test.lh5")
     assert np.array_equal(tb_dat["dset1"].nda, np.ones(10))
 
 
@@ -651,13 +633,13 @@ def test_write_object_overwrite_table_with_deletion(caplog, tmptestdir):
     if os.path.exists(f"{tmptestdir}/write_object_overwrite_test.lh5"):
         os.remove(f"{tmptestdir}/write_object_overwrite_test.lh5")
 
-    tb1 = lh5.Table(col_dict={"dset1": lh5.Array(np.zeros(10))})
-    tb2 = lh5.Table(
-        col_dict={"dset2": lh5.Array(np.ones(10))}
+    tb1 = types.Table(col_dict={"dset1": types.Array(np.zeros(10))})
+    tb2 = types.Table(
+        col_dict={"dset2": types.Array(np.ones(10))}
     )  # Same field name, different values
-    store = LH5Store()
-    store.write_object(tb1, "my_group", f"{tmptestdir}/write_object_overwrite_test.lh5")
-    store.write_object(
+    store = lh5.LH5Store()
+    store.write(tb1, "my_group", f"{tmptestdir}/write_object_overwrite_test.lh5")
+    store.write(
         tb2,
         "my_group",
         f"{tmptestdir}/write_object_overwrite_test.lh5",
@@ -665,9 +647,7 @@ def test_write_object_overwrite_table_with_deletion(caplog, tmptestdir):
     )  # Now, try to overwrite with a different field
 
     # Now, check that the data were overwritten
-    tb_dat, _ = store.read_object(
-        "my_group", f"{tmptestdir}/write_object_overwrite_test.lh5"
-    )
+    tb_dat, _ = store.read("my_group", f"{tmptestdir}/write_object_overwrite_test.lh5")
     assert np.array_equal(tb_dat["dset2"].nda, np.ones(10))
 
     # Also make sure that the first table's fields aren't lurking around the lh5 file!
@@ -678,18 +658,18 @@ def test_write_object_overwrite_table_with_deletion(caplog, tmptestdir):
     if os.path.exists(f"{tmptestdir}/write_object_overwrite_test.lh5"):
         os.remove(f"{tmptestdir}/write_object_overwrite_test.lh5")
 
-    tb1 = lh5.Table(col_dict={"dset1": lh5.Array(np.zeros(10))})
-    tb2 = lh5.Table(
-        col_dict={"dset2": lh5.Array(np.ones(10))}
+    tb1 = types.Table(col_dict={"dset1": types.Array(np.zeros(10))})
+    tb2 = types.Table(
+        col_dict={"dset2": types.Array(np.ones(10))}
     )  # Same field name, different values
-    store = LH5Store()
-    store.write_object(
+    store = lh5.LH5Store()
+    store.write(
         tb1,
         "my_table",
         f"{tmptestdir}/write_object_overwrite_test.lh5",
         group="my_group",
     )
-    store.write_object(
+    store.write(
         tb2,
         "my_table",
         f"{tmptestdir}/write_object_overwrite_test.lh5",
@@ -698,7 +678,7 @@ def test_write_object_overwrite_table_with_deletion(caplog, tmptestdir):
     )  # Now, try to overwrite with a different field
 
     # Now, check that the data were overwritten
-    tb_dat, _ = store.read_object(
+    tb_dat, _ = store.read(
         "my_group/my_table", f"{tmptestdir}/write_object_overwrite_test.lh5"
     )
     assert np.array_equal(tb_dat["dset2"].nda, np.ones(10))
@@ -713,11 +693,11 @@ def test_write_object_overwrite_lgdo(caplog, tmptestdir):
     caplog.set_level(logging.DEBUG)
     caplog.clear()
 
-    # Start with an lgdo.WaveformTable
+    # Start with an types.WaveformTable
     if os.path.exists(f"{tmptestdir}/write_object_overwrite_test.lh5"):
         os.remove(f"{tmptestdir}/write_object_overwrite_test.lh5")
 
-    tb1 = lh5.WaveformTable(
+    tb1 = types.WaveformTable(
         t0=np.zeros(10),
         t0_units="ns",
         dt=np.zeros(10),
@@ -725,7 +705,7 @@ def test_write_object_overwrite_lgdo(caplog, tmptestdir):
         values=np.zeros((10, 10)),
         values_units="ADC",
     )
-    tb2 = lh5.WaveformTable(
+    tb2 = types.WaveformTable(
         t0=np.ones(10),
         t0_units="ns",
         dt=np.ones(10),
@@ -733,14 +713,14 @@ def test_write_object_overwrite_lgdo(caplog, tmptestdir):
         values=np.ones((10, 10)),
         values_units="ADC",
     )  # Same field name, different values
-    store = LH5Store()
-    store.write_object(
+    store = lh5.LH5Store()
+    store.write(
         tb1,
         "my_table",
         f"{tmptestdir}/write_object_overwrite_test.lh5",
         group="my_group",
     )
-    store.write_object(
+    store.write(
         tb2,
         "my_table",
         f"{tmptestdir}/write_object_overwrite_test.lh5",
@@ -754,19 +734,17 @@ def test_write_object_overwrite_lgdo(caplog, tmptestdir):
     ]
 
     # Now, check that the data were overwritten
-    tb_dat, _ = store.read_object(
+    tb_dat, _ = store.read(
         "my_group/my_table", f"{tmptestdir}/write_object_overwrite_test.lh5"
     )
     assert np.array_equal(tb_dat["values"].nda, np.ones((10, 10)))
 
     # Now try overwriting an array, and test the write_start argument
-    array1 = lh5.Array(nda=np.zeros(10))
-    array2 = lh5.Array(nda=np.ones(20))
-    store = LH5Store()
-    store.write_object(
-        array1, "my_array", f"{tmptestdir}/write_object_overwrite_test.lh5"
-    )
-    store.write_object(
+    array1 = types.Array(nda=np.zeros(10))
+    array2 = types.Array(nda=np.ones(20))
+    store = lh5.LH5Store()
+    store.write(array1, "my_array", f"{tmptestdir}/write_object_overwrite_test.lh5")
+    store.write(
         array2,
         "my_array",
         f"{tmptestdir}/write_object_overwrite_test.lh5",
@@ -775,7 +753,7 @@ def test_write_object_overwrite_lgdo(caplog, tmptestdir):
     )
 
     # Now, check that the data were overwritten
-    array_dat, _ = store.read_object(
+    array_dat, _ = store.read(
         "my_array", f"{tmptestdir}/write_object_overwrite_test.lh5"
     )
     expected_out_array = np.append(np.zeros(5), np.ones(20))
@@ -783,13 +761,11 @@ def test_write_object_overwrite_lgdo(caplog, tmptestdir):
     assert np.array_equal(array_dat.nda, expected_out_array)
 
     # Now try overwriting a scalar
-    scalar1 = lh5.Scalar(0)
-    scalar2 = lh5.Scalar(1)
-    store = LH5Store()
-    store.write_object(
-        scalar1, "my_scalar", f"{tmptestdir}/write_object_overwrite_test.lh5"
-    )
-    store.write_object(
+    scalar1 = types.Scalar(0)
+    scalar2 = types.Scalar(1)
+    store = lh5.LH5Store()
+    store.write(scalar1, "my_scalar", f"{tmptestdir}/write_object_overwrite_test.lh5")
+    store.write(
         scalar2,
         "my_scalar",
         f"{tmptestdir}/write_object_overwrite_test.lh5",
@@ -797,20 +773,18 @@ def test_write_object_overwrite_lgdo(caplog, tmptestdir):
     )
 
     # Now, check that the data were overwritten
-    scalar_dat, _ = store.read_object(
+    scalar_dat, _ = store.read(
         "my_scalar", f"{tmptestdir}/write_object_overwrite_test.lh5"
     )
 
     assert scalar_dat.value == 1
 
     # Finally, try overwriting a vector of vectors
-    vov1 = lh5.VectorOfVectors(listoflists=[np.zeros(1), np.ones(2), np.zeros(3)])
-    vov2 = lh5.VectorOfVectors(listoflists=[np.ones(1), np.zeros(2), np.ones(3)])
-    store = LH5Store()
-    store.write_object(
-        vov1, "my_vector", f"{tmptestdir}/write_object_overwrite_test.lh5"
-    )
-    store.write_object(
+    vov1 = types.VectorOfVectors(listoflists=[np.zeros(1), np.ones(2), np.zeros(3)])
+    vov2 = types.VectorOfVectors(listoflists=[np.ones(1), np.zeros(2), np.ones(3)])
+    store = lh5.LH5Store()
+    store.write(vov1, "my_vector", f"{tmptestdir}/write_object_overwrite_test.lh5")
+    store.write(
         vov2,
         "my_vector",
         f"{tmptestdir}/write_object_overwrite_test.lh5",
@@ -818,7 +792,7 @@ def test_write_object_overwrite_lgdo(caplog, tmptestdir):
         write_start=1,
     )  # start overwriting the second list of lists
 
-    vector_dat, _ = store.read_object(
+    vector_dat, _ = store.read(
         "my_vector", f"{tmptestdir}/write_object_overwrite_test.lh5"
     )
 
@@ -832,14 +806,12 @@ def test_write_object_append_column(tmptestdir):
     if os.path.exists(f"{tmptestdir}/write_object_append_column_test.lh5"):
         os.remove(f"{tmptestdir}/write_object_append_column_test.lh5")
 
-    array1 = lh5.Array(np.zeros(10))
-    tb1 = lh5.Table(col_dict={"dset1`": lh5.Array(np.ones(10))})
-    store = LH5Store()
-    store.write_object(
-        array1, "my_table", f"{tmptestdir}/write_object_append_column_test.lh5"
-    )
+    array1 = types.Array(np.zeros(10))
+    tb1 = types.Table(col_dict={"dset1`": types.Array(np.ones(10))})
+    store = lh5.LH5Store()
+    store.write(array1, "my_table", f"{tmptestdir}/write_object_append_column_test.lh5")
     with pytest.raises(RuntimeError) as exc_info:
-        store.write_object(
+        store.write(
             tb1,
             "my_table",
             f"{tmptestdir}/write_object_append_column_test.lh5",
@@ -855,18 +827,19 @@ def test_write_object_append_column(tmptestdir):
     if os.path.exists(f"{tmptestdir}/write_object_append_column_test.lh5"):
         os.remove(f"{tmptestdir}/write_object_append_column_test.lh5")
 
-    tb1 = lh5.Table(
-        col_dict={"dset1": lh5.Array(np.zeros(10)), "dset2": lh5.Array(np.zeros(10))}
+    tb1 = types.Table(
+        col_dict={
+            "dset1": types.Array(np.zeros(10)),
+            "dset2": types.Array(np.zeros(10)),
+        }
     )
-    tb2 = lh5.Table(
-        col_dict={"dset2": lh5.Array(np.ones(10))}
+    tb2 = types.Table(
+        col_dict={"dset2": types.Array(np.ones(10))}
     )  # Same field name, different values
-    store = LH5Store()
-    store.write_object(
-        tb1, "my_table", f"{tmptestdir}/write_object_append_column_test.lh5"
-    )
+    store = lh5.LH5Store()
+    store.write(tb1, "my_table", f"{tmptestdir}/write_object_append_column_test.lh5")
     with pytest.raises(ValueError) as exc_info:
-        store.write_object(
+        store.write(
             tb2,
             "my_table",
             f"{tmptestdir}/write_object_append_column_test.lh5",
@@ -883,16 +856,14 @@ def test_write_object_append_column(tmptestdir):
     if os.path.exists(f"{tmptestdir}/write_object_append_column_test.lh5"):
         os.remove(f"{tmptestdir}/write_object_append_column_test.lh5")
 
-    tb1 = lh5.Table(col_dict={"dset1": lh5.Array(np.zeros(10))})
-    tb2 = lh5.Table(
-        col_dict={"dset2": lh5.Array(np.ones(20))}
+    tb1 = types.Table(col_dict={"dset1": types.Array(np.zeros(10))})
+    tb2 = types.Table(
+        col_dict={"dset2": types.Array(np.ones(20))}
     )  # different field name, different size
-    store = LH5Store()
-    store.write_object(
-        tb1, "my_table", f"{tmptestdir}/write_object_append_column_test.lh5"
-    )
+    store = lh5.LH5Store()
+    store.write(tb1, "my_table", f"{tmptestdir}/write_object_append_column_test.lh5")
     with pytest.raises(ValueError) as exc_info:
-        store.write_object(
+        store.write(
             tb2,
             "my_table",
             f"{tmptestdir}/write_object_append_column_test.lh5",
@@ -909,18 +880,18 @@ def test_write_object_append_column(tmptestdir):
     if os.path.exists(f"{tmptestdir}/write_object_append_column_test.lh5"):
         os.remove(f"{tmptestdir}/write_object_append_column_test.lh5")
 
-    tb1 = lh5.Table(col_dict={"dset1": lh5.Array(np.zeros(10))})
-    tb2 = lh5.Table(
-        col_dict={"dset2": lh5.Array(np.ones(10))}
+    tb1 = types.Table(col_dict={"dset1": types.Array(np.zeros(10))})
+    tb2 = types.Table(
+        col_dict={"dset2": types.Array(np.ones(10))}
     )  # different field name, different size
-    store = LH5Store()
-    store.write_object(
+    store = lh5.LH5Store()
+    store.write(
         tb1,
         "my_table",
         f"{tmptestdir}/write_object_append_column_test.lh5",
         group="my_group",
     )
-    store.write_object(
+    store.write(
         tb2,
         "my_table",
         f"{tmptestdir}/write_object_append_column_test.lh5",
@@ -929,9 +900,20 @@ def test_write_object_append_column(tmptestdir):
     )
 
     # Now, check that the data were appended
-    tb_dat, _ = store.read_object(
+    tb_dat, _ = store.read(
         "my_group/my_table", f"{tmptestdir}/write_object_append_column_test.lh5"
     )
-    assert isinstance(tb_dat, lgdo.Table)
+    assert isinstance(tb_dat, types.Table)
     assert np.array_equal(tb_dat["dset1"].nda, np.zeros(10))
     assert np.array_equal(tb_dat["dset2"].nda, np.ones(10))
+
+
+def test_load_dfs(lgnd_file):
+    dfs = lh5.load_dfs(
+        [lgnd_file, lgnd_file],
+        ["baseline", "waveform/t0"],
+        lh5_group="/geds/raw",
+        idx_list=[[1, 3, 5], [2, 6, 7]],
+    )
+
+    assert isinstance(dfs, pd.DataFrame)
diff --git a/tests/lh5/test_lh5_utils.py b/tests/lh5/test_lh5_utils.py
new file mode 100644
index 00000000..c83dd9a9
--- /dev/null
+++ b/tests/lh5/test_lh5_utils.py
@@ -0,0 +1,72 @@
+import os
+
+import pytest
+
+import lgdo.lh5.utils as utils
+
+
+@pytest.fixture(scope="module")
+def lgnd_file(lgnd_test_data):
+    return lgnd_test_data.get_path("lh5/LDQTA_r117_20200110T105115Z_cal_geds_raw.lh5")
+
+
+def test_parse_datatype():
+    datatypes = [
+        ("real", ("scalar", None, "real")),
+        ("array<1>{bool}", ("array", (1,), "bool")),
+        ("fixedsizearray<2>{real}", ("fixedsizearray", (2,), "real")),
+        (
+            "arrayofequalsizedarrays<3,4>{complex}",
+            ("arrayofequalsizedarrays", (3, 4), "complex"),
+        ),
+        ("array<1>{array<1>{blob}}", ("array", (1,), "array<1>{blob}")),
+        (
+            "struct{field1,field2,fieldn}",
+            ("struct", None, ["field1", "field2", "fieldn"]),
+        ),
+        ("table{col1,col2,coln}", ("table", None, ["col1", "col2", "coln"])),
+    ]
+
+    for string, dt_tuple in datatypes:
+        pd_dt_tuple = utils.parse_datatype(string)
+        assert pd_dt_tuple == dt_tuple
+
+
+def test_expand_vars():
+    # Check env variable expansion
+    os.environ["PYGAMATESTBASEDIR"] = "a_random_string"
+    assert utils.expand_vars("$PYGAMATESTBASEDIR/blah") == "a_random_string/blah"
+
+    # Check user variable expansion
+    assert (
+        utils.expand_vars(
+            "$PYGAMATESTBASEDIR2/blah",
+            substitute={"PYGAMATESTBASEDIR2": "a_random_string"},
+        )
+        == "a_random_string/blah"
+    )
+
+
+def test_expand_path(lgnd_test_data):
+    files = [
+        lgnd_test_data.get_path(
+            "lh5/prod-ref-l200/generated/tier/dsp/cal/p03/r001/l200-p03-r001-cal-20230318T012144Z-tier_dsp.lh5"
+        ),
+        lgnd_test_data.get_path(
+            "lh5/prod-ref-l200/generated/tier/dsp/cal/p03/r001/l200-p03-r001-cal-20230318T012228Z-tier_dsp.lh5"
+        ),
+    ]
+    base_dir = os.path.dirname(files[0])
+
+    assert utils.expand_path(f"{base_dir}/*20230318T012144Z*") == files[0]
+
+    # Should fail if file not found
+    with pytest.raises(FileNotFoundError):
+        utils.expand_path(f"{base_dir}/not_a_real_file.lh5")
+
+    # Should fail if multiple files found
+    with pytest.raises(FileNotFoundError):
+        utils.expand_path(f"{base_dir}/*.lh5")
+
+    # Check if it finds a list of files correctly
+    assert sorted(utils.expand_path(f"{base_dir}/*.lh5", list=True)) == sorted(files)
diff --git a/tests/test_lgdo_utils.py b/tests/test_lgdo_utils.py
index 49df91ca..ce86d971 100644
--- a/tests/test_lgdo_utils.py
+++ b/tests/test_lgdo_utils.py
@@ -1,9 +1,6 @@
-import os
-
 import numpy as np
-import pytest
 
-import lgdo.lgdo_utils as lgdo_utils
+import lgdo.utils as utils
 
 
 def test_get_element_type():
@@ -20,69 +17,5 @@ def test_get_element_type():
     ]
 
     for obj, name in objs:
-        get_name = lgdo_utils.get_element_type(obj)
+        get_name = utils.get_element_type(obj)
         assert get_name == name
-
-
-def test_parse_datatype():
-    datatypes = [
-        ("real", ("scalar", None, "real")),
-        ("array<1>{bool}", ("array", (1,), "bool")),
-        ("fixedsizearray<2>{real}", ("fixedsizearray", (2,), "real")),
-        (
-            "arrayofequalsizedarrays<3,4>{complex}",
-            ("arrayofequalsizedarrays", (3, 4), "complex"),
-        ),
-        ("array<1>{array<1>{blob}}", ("array", (1,), "array<1>{blob}")),
-        (
-            "struct{field1,field2,fieldn}",
-            ("struct", None, ["field1", "field2", "fieldn"]),
-        ),
-        ("table{col1,col2,coln}", ("table", None, ["col1", "col2", "coln"])),
-    ]
-
-    for string, dt_tuple in datatypes:
-        pd_dt_tuple = lgdo_utils.parse_datatype(string)
-        assert pd_dt_tuple == dt_tuple
-
-
-def test_expand_vars():
-    # Check env variable expansion
-    os.environ["PYGAMATESTBASEDIR"] = "a_random_string"
-    assert lgdo_utils.expand_vars("$PYGAMATESTBASEDIR/blah") == "a_random_string/blah"
-
-    # Check user variable expansion
-    assert (
-        lgdo_utils.expand_vars(
-            "$PYGAMATESTBASEDIR2/blah",
-            substitute={"PYGAMATESTBASEDIR2": "a_random_string"},
-        )
-        == "a_random_string/blah"
-    )
-
-
-def test_expand_path(lgnd_test_data):
-    files = [
-        lgnd_test_data.get_path(
-            "lh5/prod-ref-l200/generated/tier/dsp/cal/p03/r001/l200-p03-r001-cal-20230318T012144Z-tier_dsp.lh5"
-        ),
-        lgnd_test_data.get_path(
-            "lh5/prod-ref-l200/generated/tier/dsp/cal/p03/r001/l200-p03-r001-cal-20230318T012228Z-tier_dsp.lh5"
-        ),
-    ]
-    base_dir = os.path.dirname(files[0])
-
-    assert lgdo_utils.expand_path(f"{base_dir}/*20230318T012144Z*") == files[0]
-
-    # Should fail if file not found
-    with pytest.raises(FileNotFoundError):
-        lgdo_utils.expand_path(f"{base_dir}/not_a_real_file.lh5")
-
-    # Should fail if multiple files found
-    with pytest.raises(FileNotFoundError):
-        lgdo_utils.expand_path(f"{base_dir}/*.lh5")
-
-    # Check if it finds a list of files correctly
-    assert sorted(lgdo_utils.expand_path(f"{base_dir}/*.lh5", list=True)) == sorted(
-        files
-    )
diff --git a/tests/types/test_array.py b/tests/types/test_array.py
index 0932c99b..df1bcd3c 100644
--- a/tests/types/test_array.py
+++ b/tests/types/test_array.py
@@ -1,6 +1,6 @@
 import numpy as np
 
-import lgdo.lgdo_utils as utils
+import lgdo.utils as utils
 from lgdo import Array
 
 
diff --git a/tests/types/test_vectorofvectors.py b/tests/types/test_vectorofvectors.py
index 4126d119..71c20ea8 100644
--- a/tests/types/test_vectorofvectors.py
+++ b/tests/types/test_vectorofvectors.py
@@ -2,7 +2,7 @@
 import pytest
 
 import lgdo
-import lgdo.lgdo_utils as utils
+import lgdo.utils as utils
 from lgdo import VectorOfVectors
 from lgdo.types import vectorofvectors as vov