From 94c944ddad503db36d6b02a2b1fb1555c83206ef Mon Sep 17 00:00:00 2001 From: Neuberger Date: Fri, 24 Nov 2023 11:15:43 +0100 Subject: [PATCH 01/13] Moved lh5_store.py and renamed --- src/lgdo/{lh5_store.py => lh5/store.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/lgdo/{lh5_store.py => lh5/store.py} (100%) diff --git a/src/lgdo/lh5_store.py b/src/lgdo/lh5/store.py similarity index 100% rename from src/lgdo/lh5_store.py rename to src/lgdo/lh5/store.py From 0827ffb2bef44468d25b4d981efbfea8c554f7ef Mon Sep 17 00:00:00 2001 From: Neuberger Date: Fri, 24 Nov 2023 11:17:50 +0100 Subject: [PATCH 02/13] Overwrite content of store.py --- src/lgdo/lh5/store.py | 358 ++++-------------------------------------- 1 file changed, 31 insertions(+), 327 deletions(-) diff --git a/src/lgdo/lh5/store.py b/src/lgdo/lh5/store.py index 7103d05c..c0588aea 100644 --- a/src/lgdo/lh5/store.py +++ b/src/lgdo/lh5/store.py @@ -11,17 +11,16 @@ import sys from bisect import bisect_left from collections import defaultdict -from typing import Any, Iterator, Union +from typing import Any, Union import h5py import numba as nb import numpy as np import pandas as pd -from . import compression as compress -from .compression import WaveformCodec -from .lgdo_utils import expand_path, parse_datatype -from .types import ( +from .. import compression as compress +from ..compression import WaveformCodec +from ..types import ( Array, ArrayOfEncodedEqualSizedArrays, ArrayOfEqualSizedArrays, @@ -33,24 +32,26 @@ VectorOfVectors, WaveformTable, ) +from .utils import expand_path, parse_datatype LGDO = Union[Array, Scalar, Struct, VectorOfVectors] log = logging.getLogger(__name__) DEFAULT_HDF5_SETTINGS: dict[str, ...] = {"shuffle": True, "compression": "gzip"} +DEFAULT_HDF5_COMPRESSION = None -class LH5Store: +class Store: """ Class to represent a store of LEGEND HDF5 files. The two main methods - implemented by the class are :meth:`read_object` and :meth:`write_object`. + implemented by the class are :meth:`read` and :meth:`write`. Examples -------- - >>> from lgdo import LH5Store - >>> store = LH5Store() - >>> obj, _ = store.read_object("/geds/waveform", "file.lh5") + >>> from lgdo import Store + >>> store = Store() + >>> obj, _ = store.read("/geds/waveform", "file.lh5") >>> type(obj) lgdo.waveform_table.WaveformTable """ @@ -157,12 +158,12 @@ def get_buffer( """Returns an LH5 object appropriate for use as a pre-allocated buffer in a read loop. Sets size to `size` if object has a size. """ - obj, n_rows = self.read_object(name, lh5_file, n_rows=0, field_mask=field_mask) + obj, n_rows = self.read(name, lh5_file, n_rows=0, field_mask=field_mask) if hasattr(obj, "resize") and size is not None: obj.resize(new_size=size) return obj - def read_object( + def read( self, name: str, lh5_file: str | h5py.File | list[str | h5py.File], @@ -289,7 +290,7 @@ def read_object( if i == (len(lh5_file) - 1): self.in_file_loop = False - obj_buf, n_rows_read_i = self.read_object( + obj_buf, n_rows_read_i = self.read( name, lh5_file[i], start_row=start_row, @@ -391,7 +392,7 @@ def read_object( f = int(field) else: f = str(field) - obj_dict[f], _ = self.read_object( + obj_dict[f], _ = self.read( name + "/" + field, h5f, start_row=start_row, @@ -438,7 +439,7 @@ def read_object( else: fld_buf = obj_buf[field] - col_dict[field], n_rows_read = self.read_object( + col_dict[field], n_rows_read = self.read( name + "/" + field, h5f, start_row=start_row, @@ -532,7 +533,7 @@ def read_object( decoded_size_buf = obj_buf.decoded_size encoded_data_buf = obj_buf.encoded_data - decoded_size, _ = self.read_object( + decoded_size, _ = self.read( f"{name}/decoded_size", h5f, start_row=start_row, @@ -544,7 +545,7 @@ def read_object( ) # read out encoded_data, a VectorOfVectors - encoded_data, n_rows_read = self.read_object( + encoded_data, n_rows_read = self.read( f"{name}/encoded_data", h5f, start_row=start_row, @@ -610,7 +611,7 @@ def read_object( # read out cumulative_length cumulen_buf = None if obj_buf is None else obj_buf.cumulative_length - cumulative_length, n_rows_read = self.read_object( + cumulative_length, n_rows_read = self.read( f"{name}/cumulative_length", h5f, start_row=start_row, @@ -635,7 +636,7 @@ def read_object( if idx2[0][0] == -1: idx2 = (idx2[0][1:],) fd_start = 0 # this variable avoids an ndarray append - fd_starts, fds_n_rows_read = self.read_object( + fd_starts, fds_n_rows_read = self.read( f"{name}/cumulative_length", h5f, start_row=start_row, @@ -718,7 +719,7 @@ def read_object( fd_buf.resize(fdb_size) # now read - flattened_data, dummy_rows_read = self.read_object( + flattened_data, dummy_rows_read = self.read( f"{name}/flattened_data", h5f, start_row=fd_start, @@ -850,7 +851,7 @@ def read_object( raise RuntimeError("don't know how to read datatype {datatype}") - def write_object( + def write( self, obj: LGDO, name: str, @@ -1035,7 +1036,7 @@ def write_object( # Convert keys to string for dataset names f = str(field) - self.write_object( + self.write( obj_fld, f, lh5_file, @@ -1071,7 +1072,7 @@ def write_object( # ask not to further compress flattened_data, it is already compressed! obj.encoded_data.flattened_data.attrs["compression"] = None - self.write_object( + self.write( obj.encoded_data, "encoded_data", lh5_file, @@ -1083,7 +1084,7 @@ def write_object( **h5py_kwargs, ) - self.write_object( + self.write( obj.decoded_size, "decoded_size", lh5_file, @@ -1119,7 +1120,7 @@ def write_object( # First write flattened_data array. Only write rows with data. fd_start = 0 if start_row == 0 else obj.cumulative_length.nda[start_row - 1] fd_n_rows = obj.cumulative_length.nda[start_row + n_rows - 1] - fd_start - self.write_object( + self.write( obj.flattened_data, "flattened_data", lh5_file, @@ -1142,7 +1143,7 @@ def write_object( cl_dtype = obj.cumulative_length.nda.dtype.type obj.cumulative_length.nda += cl_dtype(offset) - self.write_object( + self.write( obj.cumulative_length, "cumulative_length", lh5_file, @@ -1308,7 +1309,7 @@ def ls(lh5_file: str | h5py.Group, lh5_group: str = "") -> list[str]: + ("" if lh5_group == "" else f" (and group {lh5_group})") ) - lh5_st = LH5Store() + lh5_st = Store() # To use recursively, make lh5_file a h5group instead of a string if isinstance(lh5_file, str): lh5_file = lh5_st.gimme_file(lh5_file, "r") @@ -1472,7 +1473,7 @@ def load_nda( # Expand wildcards f_list = [f for f_wc in f_list for f in sorted(glob.glob(os.path.expandvars(f_wc)))] - sto = LH5Store() + sto = Store() par_data = {par: [] for par in par_list} for ii, f in enumerate(f_list): f = sto.gimme_file(f, "r") @@ -1481,9 +1482,9 @@ def load_nda( raise RuntimeError(f"'{lh5_group}/{par}' not in file {f_list[ii]}") if idx_list is None: - data, _ = sto.read_object(f"{lh5_group}/{par}", f) + data, _ = sto.read(f"{lh5_group}/{par}", f) else: - data, _ = sto.read_object(f"{lh5_group}/{par}", f, idx=idx_list[ii]) + data, _ = sto.read(f"{lh5_group}/{par}", f, idx=idx_list[ii]) if not data: continue par_data[par].append(data.nda) @@ -1519,303 +1520,6 @@ def load_dfs( ) -class LH5Iterator(Iterator): - """ - A class for iterating through one or more LH5 files, one block of entries - at a time. This also accepts an entry list/mask to enable event selection, - and a field mask. - - This class can be used either for random access: - - >>> lh5_obj, n_rows = lh5_it.read(entry) - - to read the block of entries starting at entry. In case of multiple files - or the use of an event selection, entry refers to a global event index - across files and does not count events that are excluded by the selection. - - This can also be used as an iterator: - - >>> for lh5_obj, entry, n_rows in LH5Iterator(...): - >>> # do the thing! - - This is intended for if you are reading a large quantity of data but - want to limit your memory usage (particularly when reading in waveforms!). - The ``lh5_obj`` that is read by this class is reused in order to avoid - reallocation of memory; this means that if you want to hold on to data - between reads, you will have to copy it somewhere! - """ - - def __init__( - self, - lh5_files: str | list[str], - groups: str | list[str], - base_path: str = "", - entry_list: list[int] | list[list[int]] = None, - entry_mask: list[bool] | list[list[bool]] = None, - field_mask: dict[str, bool] | list[str] | tuple[str] = None, - buffer_len: int = 3200, - friend: LH5Iterator = None, - ) -> None: - """ - Parameters - ---------- - lh5_files - file or files to read from. May include wildcards and environment - variables. - groups - HDF5 group(s) to read. If a list is provided for both lh5_files - and group, they must be the same size. If a file is wild-carded, - the same group will be assigned to each file found - entry_list - list of entry numbers to read. If a nested list is provided, - expect one top-level list for each file, containing a list of - local entries. If a list of ints is provided, use global entries. - entry_mask - mask of entries to read. If a list of arrays is provided, expect - one for each file. Ignore if a selection list is provided. - field_mask - mask of which fields to read. See :meth:`LH5Store.read_object` for - more details. - buffer_len - number of entries to read at a time while iterating through files. - friend - a ''friend'' LH5Iterator that will be read in parallel with this. - The friend should have the same length and entry list. A single - LH5 table containing columns from both iterators will be returned. - """ - self.lh5_st = LH5Store(base_path=base_path, keep_open=True) - - # List of files, with wildcards and env vars expanded - if isinstance(lh5_files, str): - lh5_files = [lh5_files] - if isinstance(groups, list): - lh5_files *= len(groups) - elif not isinstance(lh5_files, list): - raise ValueError("lh5_files must be a string or list of strings") - - if isinstance(groups, str): - groups = [groups] * len(lh5_files) - elif not isinstance(groups, list): - raise ValueError("group must be a string or list of strings") - - if not len(groups) == len(lh5_files): - raise ValueError("lh5_files and groups must have same length") - - self.lh5_files = [] - self.groups = [] - for f, g in zip(lh5_files, groups): - f_exp = expand_path(f, list=True, base_path=base_path) - self.lh5_files += f_exp - self.groups += [g] * len(f_exp) - - if entry_list is not None and entry_mask is not None: - raise ValueError( - "entry_list and entry_mask arguments are mutually exclusive" - ) - - # Map to last row in each file - self.file_map = np.full(len(self.lh5_files), np.iinfo("i").max, "i") - # Map to last iterator entry for each file - self.entry_map = np.full(len(self.lh5_files), np.iinfo("i").max, "i") - self.buffer_len = buffer_len - - if len(self.lh5_files) > 0: - f = self.lh5_files[0] - g = self.groups[0] - self.lh5_buffer = self.lh5_st.get_buffer( - g, - f, - size=self.buffer_len, - field_mask=field_mask, - ) - self.file_map[0] = self.lh5_st.read_n_rows(g, f) - else: - raise RuntimeError(f"can't open any files from {lh5_files}") - - self.n_rows = 0 - self.current_entry = 0 - self.next_entry = 0 - - self.field_mask = field_mask - - # List of entry indices from each file - self.local_entry_list = None - self.global_entry_list = None - if entry_list is not None: - entry_list = list(entry_list) - if isinstance(entry_list[0], int): - self.local_entry_list = [None] * len(self.file_map) - self.global_entry_list = np.array(entry_list, "i") - self.global_entry_list.sort() - - else: - self.local_entry_list = [[]] * len(self.file_map) - for i_file, local_list in enumerate(entry_list): - self.local_entry_list[i_file] = np.array(local_list, "i") - self.local_entry_list[i_file].sort() - - elif entry_mask is not None: - # Convert entry mask into an entry list - if isinstance(entry_mask, pd.Series): - entry_mask = entry_mask.values - if isinstance(entry_mask, np.ndarray): - self.local_entry_list = [None] * len(self.file_map) - self.global_entry_list = np.nonzero(entry_mask)[0] - else: - self.local_entry_list = [[]] * len(self.file_map) - for i_file, local_mask in enumerate(entry_mask): - self.local_entry_list[i_file] = np.nonzero(local_mask)[0] - - # Attach the friend - if friend is not None: - if not isinstance(friend, LH5Iterator): - raise ValueError("Friend must be an LH5Iterator") - self.lh5_buffer.join(friend.lh5_buffer) - self.friend = friend - - def _get_file_cumlen(self, i_file: int) -> int: - """Helper to get cumulative file length of file""" - if i_file < 0: - return 0 - fcl = self.file_map[i_file] - if fcl == np.iinfo("i").max: - fcl = self._get_file_cumlen(i_file - 1) + self.lh5_st.read_n_rows( - self.groups[i_file], self.lh5_files[i_file] - ) - self.file_map[i_file] = fcl - return fcl - - def _get_file_cumentries(self, i_file: int) -> int: - """Helper to get cumulative iterator entries in file""" - if i_file < 0: - return 0 - n = self.entry_map[i_file] - if n == np.iinfo("i").max: - elist = self.get_file_entrylist(i_file) - fcl = self._get_file_cumlen(i_file) - if elist is None: - # no entry list provided - n = fcl - else: - file_entries = self.get_file_entrylist(i_file) - n = len(file_entries) - # check that file entries fall inside of file - if n > 0 and file_entries[-1] >= fcl: - logging.warning(f"Found entries out of range for file {i_file}") - n = np.searchsorted(file_entries, fcl, "right") - n += self._get_file_cumentries(i_file - 1) - self.entry_map[i_file] = n - return n - - def get_file_entrylist(self, i_file: int) -> np.ndarray: - """Helper to get entry list for file""" - # If no entry list is provided - if self.local_entry_list is None: - return None - - elist = self.local_entry_list[i_file] - if elist is None: - # Get local entrylist for this file from global entry list - f_start = self._get_file_cumlen(i_file - 1) - f_end = self._get_file_cumlen(i_file) - i_start = self._get_file_cumentries(i_file - 1) - i_stop = np.searchsorted(self.global_entry_list, f_end, "right") - elist = np.array(self.global_entry_list[i_start:i_stop], "i") - f_start - self.local_entry_list[i_file] = elist - return elist - - def get_global_entrylist(self) -> np.ndarray: - """Get global entry list, constructing it if needed""" - if self.global_entry_list is None and self.local_entry_list is not None: - self.global_entry_list = np.zeros(len(self), "i") - for i_file in range(len(self.lh5_files)): - i_start = self.get_file_cumentries(i_file - 1) - i_stop = self.get_file_cumentries(i_file) - f_start = self.get_file_cumlen(i_file - 1) - self.global_entry_list[i_start:i_stop] = ( - self.get_file_entrylist(i_file) + f_start - ) - return self.global_entry_list - - def read(self, entry: int) -> tuple[LGDO, int]: - """Read the nextlocal chunk of events, starting at entry. Return the - LH5 buffer and number of rows read.""" - self.n_rows = 0 - i_file = np.searchsorted(self.entry_map, entry, "right") - - # if file hasn't been opened yet, search through files - # sequentially until we find the right one - if i_file < len(self.lh5_files) and self.entry_map[i_file] == np.iinfo("i").max: - while i_file < len(self.lh5_files) and entry >= self._get_file_cumentries( - i_file - ): - i_file += 1 - - if i_file == len(self.lh5_files): - return (self.lh5_buffer, self.n_rows) - local_entry = entry - self._get_file_cumentries(i_file - 1) - - while self.n_rows < self.buffer_len and i_file < len(self.file_map): - # Loop through files - local_idx = self.get_file_entrylist(i_file) - if local_idx is not None and len(local_idx) == 0: - i_file += 1 - local_entry = 0 - continue - - i_local = local_idx[local_entry] if local_idx is not None else local_entry - self.lh5_buffer, n_rows = self.lh5_st.read_object( - self.groups[i_file], - self.lh5_files[i_file], - start_row=i_local, - n_rows=self.buffer_len - self.n_rows, - idx=local_idx, - field_mask=self.field_mask, - obj_buf=self.lh5_buffer, - obj_buf_start=self.n_rows, - ) - - self.n_rows += n_rows - i_file += 1 - local_entry = 0 - - self.current_entry = entry - - if self.friend is not None: - self.friend.read(entry) - - return (self.lh5_buffer, self.n_rows) - - def reset_field_mask(self, mask): - """Replaces the field mask of this iterator and any friends with mask""" - self.field_mask = mask - if self.friend is not None: - self.friend.reset_field_mask(mask) - - def __len__(self) -> int: - """Return the total number of entries.""" - return ( - self._get_file_cumentries(len(self.lh5_files) - 1) - if len(self.entry_map) > 0 - else 0 - ) - - def __iter__(self) -> Iterator: - """Loop through entries in blocks of size buffer_len.""" - self.current_entry = 0 - self.next_entry = 0 - return self - - def __next__(self) -> tuple[LGDO, int, int]: - """Read next buffer_len entries and return lh5_table, iterator entry - and n_rows read.""" - buf, n_rows = self.read(self.next_entry) - self.next_entry = self.current_entry + n_rows - if n_rows == 0: - raise StopIteration - return (buf, self.current_entry, n_rows) - - @nb.njit(parallel=False, fastmath=True) def _make_fd_idx(starts, stops, idx): k = 0 From 4c930a77fc0d91c2ae20fdd88cbddd4093baf1e6 Mon Sep 17 00:00:00 2001 From: Neuberger Date: Fri, 24 Nov 2023 11:19:47 +0100 Subject: [PATCH 03/13] Added other files --- src/lgdo/__init__.py | 4 +- src/lgdo/lgdo_utils.py | 212 ++++++++--------------------------------- 2 files changed, 43 insertions(+), 173 deletions(-) diff --git a/src/lgdo/__init__.py b/src/lgdo/__init__.py index 5e6eb7e0..25efb08d 100644 --- a/src/lgdo/__init__.py +++ b/src/lgdo/__init__.py @@ -66,11 +66,11 @@ "VectorOfVectors", "VectorOfEncodedVectors", "WaveformTable", - "LH5Iterator", - "LH5Store", "load_dfs", "load_nda", "ls", "show", + "LH5Iterator", + "LH5Store", "__version__", ] diff --git a/src/lgdo/lgdo_utils.py b/src/lgdo/lgdo_utils.py index 05b46bd5..14eeee1b 100644 --- a/src/lgdo/lgdo_utils.py +++ b/src/lgdo/lgdo_utils.py @@ -1,149 +1,53 @@ -"""Implements utilities for LEGEND Data Objects.""" from __future__ import annotations -import glob -import logging -import os -import string +from warnings import warn import numpy as np from . import types as lgdo +from .lh5 import utils -log = logging.getLogger(__name__) - -def get_element_type(obj: object) -> str: - """Get the LGDO element type of a scalar or array. - - For use in LGDO datatype attributes. - - Parameters - ---------- - obj - if a ``str``, will automatically return ``string`` if the object has - a :class:`numpy.dtype`, that will be used for determining the element - type otherwise will attempt to case the type of the object to a - :class:`numpy.dtype`. - - Returns - ------- - element_type - A string stating the determined element type of the object. - """ - - # special handling for strings - if isinstance(obj, str): - return "string" - - # the rest use dtypes - dt = obj.dtype if hasattr(obj, "dtype") else np.dtype(type(obj)) - kind = dt.kind - - if kind == "b": - return "bool" - if kind == "V": - return "blob" - if kind in ["i", "u", "f"]: - return "real" - if kind == "c": - return "complex" - if kind in ["S", "U"]: - return "string" - - # couldn't figure it out - raise ValueError( - "cannot determine lgdo element_type for object of type", type(obj).__name__ +def copy(obj: lgdo.LGDO, dtype: np.dtype = None) -> None: + warn( + "lgdo_utils.copy will soon be removed and will be replaced soon with copy member functions of each LGDO data type.", + DeprecationWarning, + stacklevel=2, ) + return utils.copy(obj, dtype) -def copy(obj: lgdo.LGDO, dtype: np.dtype = None) -> lgdo.LGDO: - """Return a copy of an LGDO. - - Parameters - ---------- - obj - the LGDO to be copied. - dtype - NumPy dtype to be used for the copied object. - - """ - if dtype is None: - dtype = obj.dtype - - if isinstance(obj, lgdo.Array): - return lgdo.Array( - np.array(obj.nda, dtype=dtype, copy=True), attrs=dict(obj.attrs) - ) - - if isinstance(obj, lgdo.VectorOfVectors): - return lgdo.VectorOfVectors( - flattened_data=copy(obj.flattened_data, dtype=dtype), - cumulative_length=copy(obj.cumulative_length), - attrs=dict(obj.attrs), - ) - - else: - raise ValueError(f"copy of {type(obj)} not supported") +def get_element_type(obj: object) -> str: + warn( + "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Store and LH5Iterator. " + "Please replace 'import lgdo.lh5_store' with 'import lgdo.lh5'. " + "lgdo.lh5_store will be removed in a future release.", + DeprecationWarning, + stacklevel=2, + ) + return utils.get_element_type(obj) def parse_datatype(datatype: str) -> tuple[str, tuple[int, ...], str | list[str]]: - """Parse datatype string and return type, dimensions and elements. - - Parameters - ---------- - datatype - a LGDO-formatted datatype string. - - Returns - ------- - element_type - the datatype name dims if not ``None``, a tuple of dimensions for the - LGDO. Note this is not the same as the NumPy shape of the underlying - data object. See the LGDO specification for more information. Also see - :class:`~.types.ArrayOfEqualSizedArrays` and - :meth:`.lh5_store.LH5Store.read_object` for example code elements for - numeric objects, the element type for struct-like objects, the list of - fields in the struct. - """ - if "{" not in datatype: - return "scalar", None, datatype - - # for other datatypes, need to parse the datatype string - from parse import parse - - datatype, element_description = parse("{}{{{}}}", datatype) - if datatype.endswith(">"): - datatype, dims = parse("{}<{}>", datatype) - dims = [int(i) for i in dims.split(",")] - return datatype, tuple(dims), element_description - else: - return datatype, None, element_description.split(",") + warn( + "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Store and LH5Iterator. " + "Please replace 'import lgdo.lh5_store' with 'import lgdo.lh5'. " + "lgdo.lh5_store will be removed in a future release.", + DeprecationWarning, + stacklevel=2, + ) + return utils.parse_datatype(datatype) def expand_vars(expr: str, substitute: dict[str, str] = None) -> str: - """Expand (environment) variables. - - Note - ---- - Malformed variable names and references to non-existing variables are left - unchanged. - - Parameters - ---------- - expr - string expression, which may include (environment) variables prefixed by - ``$``. - substitute - use this dictionary to substitute variables. Takes precedence over - environment variables. - """ - if substitute is None: - substitute = {} - - # use provided mapping - # then expand env variables - return os.path.expandvars(string.Template(expr).safe_substitute(substitute)) + warn( + "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Store and LH5Iterator. " + "Please replace 'import lgdo.lh5_store' with 'import lgdo.lh5'. " + "lgdo.lh5_store will be removed in a future release.", + DeprecationWarning, + stacklevel=2, + ) + return utils.expand_vars(expr, substitute) def expand_path( @@ -152,45 +56,11 @@ def expand_path( list: bool = False, base_path: str = None, ) -> str | list: - """Expand (environment) variables and wildcards to return absolute paths. - - Parameters - ---------- - path - name of path, which may include environment variables and wildcards. - list - if ``True``, return a list. If ``False``, return a string; if ``False`` - and a unique file is not found, raise an exception. - substitute - use this dictionary to substitute variables. Environment variables take - precedence. - base_path - name of base path. Returned paths will be relative to base. - - Returns - ------- - path or list of paths - Unique absolute path, or list of all absolute paths - """ - if base_path is not None and base_path != "": - base_path = os.path.expanduser(os.path.expandvars(base_path)) - path = os.path.join(base_path, path) - - # first expand variables - _path = expand_vars(path, substitute) - - # then expand wildcards - paths = sorted(glob.glob(os.path.expanduser(_path))) - - if base_path is not None and base_path != "": - paths = [os.path.relpath(p, base_path) for p in paths] - - if not list: - if len(paths) == 0: - raise FileNotFoundError(f"could not find path matching {path}") - elif len(paths) > 1: - raise FileNotFoundError(f"found multiple paths matching {path}") - else: - return paths[0] - else: - return paths + warn( + "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Store and LH5Iterator. " + "Please replace 'import lgdo.lh5_store' with 'import lgdo.lh5'. " + "lgdo.lh5_store will be removed in a future release.", + DeprecationWarning, + stacklevel=2, + ) + return utils.expand_path(path, substitute, list, base_path) From 76e892b2b01c17d9f2b426559ed4d395700d9a6c Mon Sep 17 00:00:00 2001 From: Neuberger Date: Fri, 24 Nov 2023 11:20:20 +0100 Subject: [PATCH 04/13] Added other files --- src/lgdo/lh5/__init__.py | 18 +++ src/lgdo/lh5/iterator.py | 310 +++++++++++++++++++++++++++++++++++++++ src/lgdo/lh5/utils.py | 118 +++++++++++++++ src/lgdo/lh5_store.py | 190 ++++++++++++++++++++++++ src/lgdo/utils.py | 84 +++++++++++ 5 files changed, 720 insertions(+) create mode 100644 src/lgdo/lh5/__init__.py create mode 100644 src/lgdo/lh5/iterator.py create mode 100644 src/lgdo/lh5/utils.py create mode 100644 src/lgdo/lh5_store.py create mode 100644 src/lgdo/utils.py diff --git a/src/lgdo/lh5/__init__.py b/src/lgdo/lh5/__init__.py new file mode 100644 index 00000000..252a7cd2 --- /dev/null +++ b/src/lgdo/lh5/__init__.py @@ -0,0 +1,18 @@ +"""Routines from reading and writing LEGEND Data Objects in HDF5 files. +Currently the primary on-disk format for LGDO object is LEGEND HDF5 (LH5) files. IO +is done via the class :class:`.store.LH5Store`. LH5 files can also be +browsed easily in python like any `HDF5 `_ file using +`h5py `_. +""" + +from .iterator import Iterator +from .store import Store, load_dfs, load_nda, ls, show + +__all__ = [ + "Iterator", + "Store", + "load_dfs", + "load_nda", + "ls", + "show", +] diff --git a/src/lgdo/lh5/iterator.py b/src/lgdo/lh5/iterator.py new file mode 100644 index 00000000..e56f6d8d --- /dev/null +++ b/src/lgdo/lh5/iterator.py @@ -0,0 +1,310 @@ +from __future__ import annotations + +import logging +import typing as typing + +import numpy as np +import pandas as pd + +from ..types import Array, Scalar, Struct, VectorOfVectors +from .store import Store +from .utils import expand_path + +LGDO = typing.Union[Array, Scalar, Struct, VectorOfVectors] + + +class Iterator(typing.Iterator): + """ + A class for iterating through one or more LH5 files, one block of entries + at a time. This also accepts an entry list/mask to enable event selection, + and a field mask. + + This class can be used either for random access: + + >>> lh5_obj, n_rows = lh5_it.read(entry) + + to read the block of entries starting at entry. In case of multiple files + or the use of an event selection, entry refers to a global event index + across files and does not count events that are excluded by the selection. + + This can also be used as an iterator: + + >>> for lh5_obj, entry, n_rows in Iterator(...): + >>> # do the thing! + + This is intended for if you are reading a large quantity of data but + want to limit your memory usage (particularly when reading in waveforms!). + The ``lh5_obj`` that is read by this class is reused in order to avoid + reallocation of memory; this means that if you want to hold on to data + between reads, you will have to copy it somewhere! + """ + + def __init__( + self, + lh5_files: str | list[str], + groups: str | list[str], + base_path: str = "", + entry_list: list[int] | list[list[int]] = None, + entry_mask: list[bool] | list[list[bool]] = None, + field_mask: dict[str, bool] | list[str] | tuple[str] = None, + buffer_len: int = 3200, + friend: typing.Iterator = None, + ) -> None: + """ + Parameters + ---------- + lh5_files + file or files to read from. May include wildcards and environment + variables. + groups + HDF5 group(s) to read. If a list is provided for both lh5_files + and group, they must be the same size. If a file is wild-carded, + the same group will be assigned to each file found + entry_list + list of entry numbers to read. If a nested list is provided, + expect one top-level list for each file, containing a list of + local entries. If a list of ints is provided, use global entries. + entry_mask + mask of entries to read. If a list of arrays is provided, expect + one for each file. Ignore if a selection list is provided. + field_mask + mask of which fields to read. See :meth:`Store.read` for + more details. + buffer_len + number of entries to read at a time while iterating through files. + friend + a ''friend'' Iterator that will be read in parallel with this. + The friend should have the same length and entry list. A single + LH5 table containing columns from both iterators will be returned. + """ + self.lh5_st = Store(base_path=base_path, keep_open=True) + + # List of files, with wildcards and env vars expanded + if isinstance(lh5_files, str): + lh5_files = [lh5_files] + if isinstance(groups, list): + lh5_files *= len(groups) + elif not isinstance(lh5_files, list): + raise ValueError("lh5_files must be a string or list of strings") + + if isinstance(groups, str): + groups = [groups] * len(lh5_files) + elif not isinstance(groups, list): + raise ValueError("group must be a string or list of strings") + + if not len(groups) == len(lh5_files): + raise ValueError("lh5_files and groups must have same length") + + self.lh5_files = [] + self.groups = [] + for f, g in zip(lh5_files, groups): + f_exp = expand_path(f, list=True, base_path=base_path) + self.lh5_files += f_exp + self.groups += [g] * len(f_exp) + + if entry_list is not None and entry_mask is not None: + raise ValueError( + "entry_list and entry_mask arguments are mutually exclusive" + ) + + # Map to last row in each file + self.file_map = np.full(len(self.lh5_files), np.iinfo("i").max, "i") + # Map to last iterator entry for each file + self.entry_map = np.full(len(self.lh5_files), np.iinfo("i").max, "i") + self.buffer_len = buffer_len + + if len(self.lh5_files) > 0: + f = self.lh5_files[0] + g = self.groups[0] + self.lh5_buffer = self.lh5_st.get_buffer( + g, + f, + size=self.buffer_len, + field_mask=field_mask, + ) + self.file_map[0] = self.lh5_st.read_n_rows(g, f) + else: + raise RuntimeError(f"can't open any files from {lh5_files}") + + self.n_rows = 0 + self.current_entry = 0 + self.next_entry = 0 + + self.field_mask = field_mask + + # List of entry indices from each file + self.local_entry_list = None + self.global_entry_list = None + if entry_list is not None: + entry_list = list(entry_list) + if isinstance(entry_list[0], int): + self.local_entry_list = [None] * len(self.file_map) + self.global_entry_list = np.array(entry_list, "i") + self.global_entry_list.sort() + + else: + self.local_entry_list = [[]] * len(self.file_map) + for i_file, local_list in enumerate(entry_list): + self.local_entry_list[i_file] = np.array(local_list, "i") + self.local_entry_list[i_file].sort() + + elif entry_mask is not None: + # Convert entry mask into an entry list + if isinstance(entry_mask, pd.Series): + entry_mask = entry_mask.values + if isinstance(entry_mask, np.ndarray): + self.local_entry_list = [None] * len(self.file_map) + self.global_entry_list = np.nonzero(entry_mask)[0] + else: + self.local_entry_list = [[]] * len(self.file_map) + for i_file, local_mask in enumerate(entry_mask): + self.local_entry_list[i_file] = np.nonzero(local_mask)[0] + + # Attach the friend + if friend is not None: + if not isinstance(friend, typing.Iterator): + raise ValueError("Friend must be an Iterator") + self.lh5_buffer.join(friend.lh5_buffer) + self.friend = friend + + def _get_file_cumlen(self, i_file: int) -> int: + """Helper to get cumulative file length of file""" + if i_file < 0: + return 0 + fcl = self.file_map[i_file] + if fcl == np.iinfo("i").max: + fcl = self._get_file_cumlen(i_file - 1) + self.lh5_st.read_n_rows( + self.groups[i_file], self.lh5_files[i_file] + ) + self.file_map[i_file] = fcl + return fcl + + def _get_file_cumentries(self, i_file: int) -> int: + """Helper to get cumulative iterator entries in file""" + if i_file < 0: + return 0 + n = self.entry_map[i_file] + if n == np.iinfo("i").max: + elist = self.get_file_entrylist(i_file) + fcl = self._get_file_cumlen(i_file) + if elist is None: + # no entry list provided + n = fcl + else: + file_entries = self.get_file_entrylist(i_file) + n = len(file_entries) + # check that file entries fall inside of file + if n > 0 and file_entries[-1] >= fcl: + logging.warning(f"Found entries out of range for file {i_file}") + n = np.searchsorted(file_entries, fcl, "right") + n += self._get_file_cumentries(i_file - 1) + self.entry_map[i_file] = n + return n + + def get_file_entrylist(self, i_file: int) -> np.ndarray: + """Helper to get entry list for file""" + # If no entry list is provided + if self.local_entry_list is None: + return None + + elist = self.local_entry_list[i_file] + if elist is None: + # Get local entrylist for this file from global entry list + f_start = self._get_file_cumlen(i_file - 1) + f_end = self._get_file_cumlen(i_file) + i_start = self._get_file_cumentries(i_file - 1) + i_stop = np.searchsorted(self.global_entry_list, f_end, "right") + elist = np.array(self.global_entry_list[i_start:i_stop], "i") - f_start + self.local_entry_list[i_file] = elist + return elist + + def get_global_entrylist(self) -> np.ndarray: + """Get global entry list, constructing it if needed""" + if self.global_entry_list is None and self.local_entry_list is not None: + self.global_entry_list = np.zeros(len(self), "i") + for i_file in range(len(self.lh5_files)): + i_start = self.get_file_cumentries(i_file - 1) + i_stop = self.get_file_cumentries(i_file) + f_start = self.get_file_cumlen(i_file - 1) + self.global_entry_list[i_start:i_stop] = ( + self.get_file_entrylist(i_file) + f_start + ) + return self.global_entry_list + + def read(self, entry: int) -> tuple[LGDO, int]: + """Read the nextlocal chunk of events, starting at entry. Return the + LH5 buffer and number of rows read.""" + self.n_rows = 0 + i_file = np.searchsorted(self.entry_map, entry, "right") + + # if file hasn't been opened yet, search through files + # sequentially until we find the right one + if i_file < len(self.lh5_files) and self.entry_map[i_file] == np.iinfo("i").max: + while i_file < len(self.lh5_files) and entry >= self._get_file_cumentries( + i_file + ): + i_file += 1 + + if i_file == len(self.lh5_files): + return (self.lh5_buffer, self.n_rows) + local_entry = entry - self._get_file_cumentries(i_file - 1) + + while self.n_rows < self.buffer_len and i_file < len(self.file_map): + # Loop through files + local_idx = self.get_file_entrylist(i_file) + if local_idx is not None and len(local_idx) == 0: + i_file += 1 + local_entry = 0 + continue + + i_local = local_idx[local_entry] if local_idx is not None else local_entry + self.lh5_buffer, n_rows = self.lh5_st.read( + self.groups[i_file], + self.lh5_files[i_file], + start_row=i_local, + n_rows=self.buffer_len - self.n_rows, + idx=local_idx, + field_mask=self.field_mask, + obj_buf=self.lh5_buffer, + obj_buf_start=self.n_rows, + ) + + self.n_rows += n_rows + i_file += 1 + local_entry = 0 + + self.current_entry = entry + + if self.friend is not None: + self.friend.read(entry) + + return (self.lh5_buffer, self.n_rows) + + def reset_field_mask(self, mask): + """Replaces the field mask of this iterator and any friends with mask""" + self.field_mask = mask + if self.friend is not None: + self.friend.reset_field_mask(mask) + + def __len__(self) -> int: + """Return the total number of entries.""" + return ( + self._get_file_cumentries(len(self.lh5_files) - 1) + if len(self.entry_map) > 0 + else 0 + ) + + def __iter__(self) -> typing.Iterator: + """Loop through entries in blocks of size buffer_len.""" + self.current_entry = 0 + self.next_entry = 0 + return self + + def __next__(self) -> tuple[LGDO, int, int]: + """Read next buffer_len entries and return lh5_table, iterator entry + and n_rows read.""" + buf, n_rows = self.read(self.next_entry) + self.next_entry = self.current_entry + n_rows + if n_rows == 0: + raise StopIteration + return (buf, self.current_entry, n_rows) diff --git a/src/lgdo/lh5/utils.py b/src/lgdo/lh5/utils.py new file mode 100644 index 00000000..bc1fd425 --- /dev/null +++ b/src/lgdo/lh5/utils.py @@ -0,0 +1,118 @@ +"""Implements utilities for LEGEND Data Objects.""" +from __future__ import annotations + +import glob +import logging +import os +import string + +log = logging.getLogger(__name__) + + +def parse_datatype(datatype: str) -> tuple[str, tuple[int, ...], str | list[str]]: + """Parse datatype string and return type, dimensions and elements. + + Parameters + ---------- + datatype + a LGDO-formatted datatype string. + + Returns + ------- + element_type + the datatype name dims if not ``None``, a tuple of dimensions for the + LGDO. Note this is not the same as the NumPy shape of the underlying + data object. See the LGDO specification for more information. Also see + :class:`~.types.ArrayOfEqualSizedArrays` and + :meth:`.lh5_store.LH5Store.read` for example code elements for + numeric objects, the element type for struct-like objects, the list of + fields in the struct. + """ + if "{" not in datatype: + return "scalar", None, datatype + + # for other datatypes, need to parse the datatype string + from parse import parse + + datatype, element_description = parse("{}{{{}}}", datatype) + if datatype.endswith(">"): + datatype, dims = parse("{}<{}>", datatype) + dims = [int(i) for i in dims.split(",")] + return datatype, tuple(dims), element_description + else: + return datatype, None, element_description.split(",") + + +def expand_vars(expr: str, substitute: dict[str, str] = None) -> str: + """Expand (environment) variables. + + Note + ---- + Malformed variable names and references to non-existing variables are left + unchanged. + + Parameters + ---------- + expr + string expression, which may include (environment) variables prefixed by + ``$``. + substitute + use this dictionary to substitute variables. Takes precedence over + environment variables. + """ + if substitute is None: + substitute = {} + + # use provided mapping + # then expand env variables + return os.path.expandvars(string.Template(expr).safe_substitute(substitute)) + + +def expand_path( + path: str, + substitute: dict[str, str] = None, + list: bool = False, + base_path: str = None, +) -> str | list: + """Expand (environment) variables and wildcards to return absolute paths. + + Parameters + ---------- + path + name of path, which may include environment variables and wildcards. + list + if ``True``, return a list. If ``False``, return a string; if ``False`` + and a unique file is not found, raise an exception. + substitute + use this dictionary to substitute variables. Environment variables take + precedence. + base_path + name of base path. Returned paths will be relative to base. + + Returns + ------- + path or list of paths + Unique absolute path, or list of all absolute paths + """ + if base_path is not None and base_path != "": + base_path = os.path.expanduser(os.path.expandvars(base_path)) + path = os.path.join(base_path, path) + + # first expand variables + _path = expand_vars(path, substitute) + + # then expand wildcards + paths = sorted(glob.glob(os.path.expanduser(_path))) + + if base_path is not None and base_path != "": + paths = [os.path.relpath(p, base_path) for p in paths] + + if not list: + if len(paths) == 0: + raise FileNotFoundError(f"could not find path matching {path}") + elif len(paths) > 1: + raise FileNotFoundError(f"found multiple paths matching {path}") + else: + return paths[0] + else: + return paths diff --git a/src/lgdo/lh5_store.py b/src/lgdo/lh5_store.py new file mode 100644 index 00000000..a404a4f8 --- /dev/null +++ b/src/lgdo/lh5_store.py @@ -0,0 +1,190 @@ +from __future__ import annotations + +import sys +from typing import Iterator, Union +from warnings import warn + +import h5py +import numpy as np +import pandas as pd + +from . import lh5 +from .types import Array # noqa: F401 +from .types import ArrayOfEncodedEqualSizedArrays # noqa: F401 +from .types import ArrayOfEqualSizedArrays # noqa: F401 +from .types import FixedSizeArray # noqa: F401 +from .types import Scalar # noqa: F401 +from .types import Struct # noqa: F401 +from .types import Table # noqa: F401 +from .types import VectorOfEncodedVectors # noqa: F401 +from .types import VectorOfVectors # noqa: F401 +from .types import WaveformTable # noqa: F401 + +DEFAULT_HDF5_COMPRESSION = None +LGDO = Union[Array, Scalar, Struct, VectorOfVectors] +DEFAULT_HDF5_SETTINGS: dict[str, ...] = {"shuffle": True, "compression": "gzip"} + + +class LH5Iterator(lh5.Iterator): + def __init__( + self, + lh5_files: str | list[str], + groups: str | list[str], + base_path: str = "", + entry_list: list[int] | list[list[int]] = None, + entry_mask: list[bool] | list[list[bool]] = None, + field_mask: dict[str, bool] | list[str] | tuple[str] = None, + buffer_len: int = 3200, + friend: Iterator = None, + ) -> None: + warn( + "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing Store and Iterator. " + "Please replace 'import lgdo.lh5_store' with 'import lgdo.lh5'. " + "lgdo.lh5_store will be removed in a future release.", + DeprecationWarning, + stacklevel=2, + ) + super().__init__( + lh5_files, + groups, + base_path, + entry_list, + entry_mask, + field_mask, + buffer_len, + friend, + ) + + def write_object( + self, + obj: LGDO, + name: str, + lh5_file: str | h5py.File, + group: str | h5py.Group = "/", + start_row: int = 0, + n_rows: int = None, + wo_mode: str = "append", + write_start: int = 0, + **h5py_kwargs, + ) -> None: + warn( + "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing Store and Iterator. " + "Please replace 'import lgdo.lh5_store' with 'import lgdo.lh5'. " + "lgdo.lh5_store will be removed in a future release.", + DeprecationWarning, + stacklevel=2, + ) + self.write( + obj, + name, + lh5_file, + group, + start_row, + n_rows, + wo_mode, + write_start, + h5py_kwargs, + ) + + def read_object( + self, + name: str, + lh5_file: str | h5py.File | list[str | h5py.File], + start_row: int = 0, + n_rows: int = sys.maxsize, + idx: np.ndarray | list | tuple | list[np.ndarray | list | tuple] = None, + field_mask: dict[str, bool] | list[str] | tuple[str] = None, + obj_buf: LGDO = None, + obj_buf_start: int = 0, + decompress: bool = True, + ) -> tuple[LGDO, int]: + warn( + "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing Store and Iterator. " + "Please replace 'import lgdo.lh5_store' with 'import lgdo.lh5'. " + "lgdo.lh5_store will be removed in a future release.", + DeprecationWarning, + stacklevel=2, + ) + return self.read( + name, + lh5_file, + start_row, + n_rows, + idx, + field_mask, + obj_buf, + obj_buf_start, + decompress, + ) + + +class LH5Store(lh5.Store): + def __init__(self, base_path: str = "", keep_open: bool = False): + warn( + "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing Store and Iterator. " + "Please replace 'import lgdo.lh5_store' with 'import lgdo.lh5'. " + "lgdo.lh5_store will be removed in a future release.", + DeprecationWarning, + stacklevel=2, + ) + super().__init__(base_path, keep_open) + + +def load_dfs( + f_list: str | list[str], + par_list: list[str], + lh5_group: str = "", + idx_list: list[np.ndarray | list | tuple] = None, +) -> pd.DataFrame: + warn( + "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing Store and Iterator. " + "Please replace 'import lgdo.lh5_store' with 'import lgdo.lh5'. " + "lgdo.lh5_store will be removed in a future release.", + DeprecationWarning, + stacklevel=2, + ) + return lh5.load_dfs(f_list, par_list, lh5_group, idx_list) + + +def load_nda( + f_list: str | list[str], + par_list: list[str], + lh5_group: str = "", + idx_list: list[np.ndarray | list | tuple] = None, +) -> dict[str, np.ndarray]: + warn( + "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing Store and Iterator. " + "Please replace 'import lgdo.lh5_store' with 'import lgdo.lh5'. " + "lgdo.lh5_store will be removed in a future release.", + DeprecationWarning, + stacklevel=2, + ) + return lh5.load_nda(f_list, par_list, lh5_group, idx_list) + + +def ls(lh5_file: str | h5py.Group, lh5_group: str = "") -> list[str]: + warn( + "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing Store and Iterator. " + "Please replace 'import lgdo.lh5_store' with 'import lgdo.lh5'. " + "lgdo.lh5_store will be removed in a future release.", + DeprecationWarning, + stacklevel=2, + ) + return lh5.ls(lh5_file, lh5_group) + + +def show( + lh5_file: str | h5py.Group, + lh5_group: str = "/", + attrs: bool = False, + indent: str = "", + header: bool = True, +) -> None: + warn( + "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing Store and Iterator. " + "Please replace 'import lgdo.lh5_store' with 'import lgdo.lh5'. " + "lgdo.lh5_store will be removed in a future release.", + DeprecationWarning, + stacklevel=2, + ) + lh5.show(lh5_file, lh5_group, attrs, indent, header) diff --git a/src/lgdo/utils.py b/src/lgdo/utils.py new file mode 100644 index 00000000..22866a35 --- /dev/null +++ b/src/lgdo/utils.py @@ -0,0 +1,84 @@ +"""Implements utilities for LEGEND Data Objects.""" +from __future__ import annotations + +import logging + +import numpy as np + +from . import types as lgdo + +log = logging.getLogger(__name__) + + +def get_element_type(obj: object) -> str: + """Get the LGDO element type of a scalar or array. + + For use in LGDO datatype attributes. + + Parameters + ---------- + obj + if a ``str``, will automatically return ``string`` if the object has + a :class:`numpy.dtype`, that will be used for determining the element + type otherwise will attempt to case the type of the object to a + :class:`numpy.dtype`. + + Returns + ------- + element_type + A string stating the determined element type of the object. + """ + + # special handling for strings + if isinstance(obj, str): + return "string" + + # the rest use dtypes + dt = obj.dtype if hasattr(obj, "dtype") else np.dtype(type(obj)) + kind = dt.kind + + if kind == "b": + return "bool" + if kind == "V": + return "blob" + if kind in ["i", "u", "f"]: + return "real" + if kind == "c": + return "complex" + if kind in ["S", "U"]: + return "string" + + # couldn't figure it out + raise ValueError( + "cannot determine lgdo element_type for object of type", type(obj).__name__ + ) + + +def copy(obj: lgdo.LGDO, dtype: np.dtype = None) -> lgdo.LGDO: + """Return a copy of an LGDO. + + Parameters + ---------- + obj + the LGDO to be copied. + dtype + NumPy dtype to be used for the copied object. + + """ + if dtype is None: + dtype = obj.dtype + + if isinstance(obj, lgdo.Array): + return lgdo.Array( + np.array(obj.nda, dtype=dtype, copy=True), attrs=dict(obj.attrs) + ) + + if isinstance(obj, lgdo.VectorOfVectors): + return lgdo.VectorOfVectors( + flattened_data=copy(obj.flattened_data, dtype=dtype), + cumulative_length=copy(obj.cumulative_length), + attrs=dict(obj.attrs), + ) + + else: + raise ValueError(f"copy of {type(obj)} not supported") From b55ae453a052e55e749ea8a678058668811d7bdf Mon Sep 17 00:00:00 2001 From: Neuberger Date: Fri, 24 Nov 2023 11:25:50 +0100 Subject: [PATCH 05/13] Moved test files --- tests/compression/conftest.py | 6 +- tests/compression/test_radware_sigcompress.py | 7 +- tests/{ => lh5}/test_lh5_iterator.py | 0 tests/{ => lh5}/test_lh5_store.py | 0 tests/test_lgdo_utils.py | 71 +------------------ tests/types/test_array.py | 2 +- tests/types/test_vectorofvectors.py | 2 +- 7 files changed, 11 insertions(+), 77 deletions(-) rename tests/{ => lh5}/test_lh5_iterator.py (100%) rename tests/{ => lh5}/test_lh5_store.py (100%) diff --git a/tests/compression/conftest.py b/tests/compression/conftest.py index 927ba1ff..61f96f73 100644 --- a/tests/compression/conftest.py +++ b/tests/compression/conftest.py @@ -1,12 +1,12 @@ import pytest -from lgdo import LH5Store +import lgdo.lh5 as lh5 @pytest.fixture() def wftable(lgnd_test_data): - store = LH5Store() - wft, _ = store.read_object( + store = lh5.Store() + wft, _ = store.read( "/geds/raw/waveform", lgnd_test_data.get_path("lh5/LDQTA_r117_20200110T105115Z_cal_geds_raw.lh5"), ) diff --git a/tests/compression/test_radware_sigcompress.py b/tests/compression/test_radware_sigcompress.py index aacf38f6..eb458a09 100644 --- a/tests/compression/test_radware_sigcompress.py +++ b/tests/compression/test_radware_sigcompress.py @@ -2,7 +2,8 @@ import numpy as np -from lgdo import ArrayOfEncodedEqualSizedArrays, ArrayOfEqualSizedArrays, LH5Store +import lgdo.lh5 as lh5 +from lgdo import ArrayOfEncodedEqualSizedArrays, ArrayOfEqualSizedArrays from lgdo.compression.radware import ( _get_hton_u16, _radware_sigcompress_decode, @@ -177,8 +178,8 @@ def test_aoesa(wftable): def test_performance(lgnd_test_data): - store = LH5Store() - obj, _ = store.read_object( + store = lh5.Store() + obj, _ = store.read( "/geds/raw/waveform", lgnd_test_data.get_path("lh5/LDQTA_r117_20200110T105115Z_cal_geds_raw.lh5"), ) diff --git a/tests/test_lh5_iterator.py b/tests/lh5/test_lh5_iterator.py similarity index 100% rename from tests/test_lh5_iterator.py rename to tests/lh5/test_lh5_iterator.py diff --git a/tests/test_lh5_store.py b/tests/lh5/test_lh5_store.py similarity index 100% rename from tests/test_lh5_store.py rename to tests/lh5/test_lh5_store.py diff --git a/tests/test_lgdo_utils.py b/tests/test_lgdo_utils.py index 49df91ca..ce86d971 100644 --- a/tests/test_lgdo_utils.py +++ b/tests/test_lgdo_utils.py @@ -1,9 +1,6 @@ -import os - import numpy as np -import pytest -import lgdo.lgdo_utils as lgdo_utils +import lgdo.utils as utils def test_get_element_type(): @@ -20,69 +17,5 @@ def test_get_element_type(): ] for obj, name in objs: - get_name = lgdo_utils.get_element_type(obj) + get_name = utils.get_element_type(obj) assert get_name == name - - -def test_parse_datatype(): - datatypes = [ - ("real", ("scalar", None, "real")), - ("array<1>{bool}", ("array", (1,), "bool")), - ("fixedsizearray<2>{real}", ("fixedsizearray", (2,), "real")), - ( - "arrayofequalsizedarrays<3,4>{complex}", - ("arrayofequalsizedarrays", (3, 4), "complex"), - ), - ("array<1>{array<1>{blob}}", ("array", (1,), "array<1>{blob}")), - ( - "struct{field1,field2,fieldn}", - ("struct", None, ["field1", "field2", "fieldn"]), - ), - ("table{col1,col2,coln}", ("table", None, ["col1", "col2", "coln"])), - ] - - for string, dt_tuple in datatypes: - pd_dt_tuple = lgdo_utils.parse_datatype(string) - assert pd_dt_tuple == dt_tuple - - -def test_expand_vars(): - # Check env variable expansion - os.environ["PYGAMATESTBASEDIR"] = "a_random_string" - assert lgdo_utils.expand_vars("$PYGAMATESTBASEDIR/blah") == "a_random_string/blah" - - # Check user variable expansion - assert ( - lgdo_utils.expand_vars( - "$PYGAMATESTBASEDIR2/blah", - substitute={"PYGAMATESTBASEDIR2": "a_random_string"}, - ) - == "a_random_string/blah" - ) - - -def test_expand_path(lgnd_test_data): - files = [ - lgnd_test_data.get_path( - "lh5/prod-ref-l200/generated/tier/dsp/cal/p03/r001/l200-p03-r001-cal-20230318T012144Z-tier_dsp.lh5" - ), - lgnd_test_data.get_path( - "lh5/prod-ref-l200/generated/tier/dsp/cal/p03/r001/l200-p03-r001-cal-20230318T012228Z-tier_dsp.lh5" - ), - ] - base_dir = os.path.dirname(files[0]) - - assert lgdo_utils.expand_path(f"{base_dir}/*20230318T012144Z*") == files[0] - - # Should fail if file not found - with pytest.raises(FileNotFoundError): - lgdo_utils.expand_path(f"{base_dir}/not_a_real_file.lh5") - - # Should fail if multiple files found - with pytest.raises(FileNotFoundError): - lgdo_utils.expand_path(f"{base_dir}/*.lh5") - - # Check if it finds a list of files correctly - assert sorted(lgdo_utils.expand_path(f"{base_dir}/*.lh5", list=True)) == sorted( - files - ) diff --git a/tests/types/test_array.py b/tests/types/test_array.py index 0932c99b..df1bcd3c 100644 --- a/tests/types/test_array.py +++ b/tests/types/test_array.py @@ -1,6 +1,6 @@ import numpy as np -import lgdo.lgdo_utils as utils +import lgdo.utils as utils from lgdo import Array diff --git a/tests/types/test_vectorofvectors.py b/tests/types/test_vectorofvectors.py index 4126d119..71c20ea8 100644 --- a/tests/types/test_vectorofvectors.py +++ b/tests/types/test_vectorofvectors.py @@ -2,7 +2,7 @@ import pytest import lgdo -import lgdo.lgdo_utils as utils +import lgdo.utils as utils from lgdo import VectorOfVectors from lgdo.types import vectorofvectors as vov From 6c4acfc39353de3ccd1008857225f7ab612f7adb Mon Sep 17 00:00:00 2001 From: Neuberger Date: Fri, 24 Nov 2023 11:26:42 +0100 Subject: [PATCH 06/13] Changed test files --- tests/lh5/test_lh5_iterator.py | 16 +- tests/lh5/test_lh5_store.py | 428 ++++++++++++++++----------------- tests/lh5/test_lh5_utils.py | 72 ++++++ 3 files changed, 285 insertions(+), 231 deletions(-) create mode 100644 tests/lh5/test_lh5_utils.py diff --git a/tests/lh5/test_lh5_iterator.py b/tests/lh5/test_lh5_iterator.py index 09297665..a9b0419e 100644 --- a/tests/lh5/test_lh5_iterator.py +++ b/tests/lh5/test_lh5_iterator.py @@ -2,7 +2,7 @@ import pytest import lgdo -from lgdo.lh5_store import LH5Iterator +import lgdo.lh5 as lh5 @pytest.fixture(scope="module") @@ -11,7 +11,7 @@ def lgnd_file(lgnd_test_data): def test_basics(lgnd_file): - lh5_it = LH5Iterator( + lh5_it = lh5.Iterator( lgnd_file, "/geds/raw", entry_list=range(100), @@ -35,14 +35,14 @@ def test_basics(lgnd_file): def test_errors(lgnd_file): with pytest.raises(RuntimeError): - LH5Iterator("non-existent-file.lh5", "random-group") + lh5.Iterator("non-existent-file.lh5", "random-group") with pytest.raises(ValueError): - LH5Iterator(1, 2) + lh5.Iterator(1, 2) def test_lgnd_waveform_table_fancy_idx(lgnd_file): - lh5_it = LH5Iterator( + lh5_it = lh5.Iterator( lgnd_file, "geds/raw/waveform", entry_list=[ @@ -97,13 +97,13 @@ def more_lgnd_files(lgnd_test_data): def test_friend(more_lgnd_files): - lh5_raw_it = LH5Iterator( + lh5_raw_it = lh5.Iterator( more_lgnd_files[0], "ch1084803/raw", field_mask=["waveform", "baseline"], buffer_len=5, ) - lh5_it = LH5Iterator( + lh5_it = lh5.Iterator( more_lgnd_files[1], "ch1084803/hit", field_mask=["is_valid_0vbb"], @@ -121,7 +121,7 @@ def test_friend(more_lgnd_files): def test_iterate(more_lgnd_files): # iterate through all hit groups in all files; there are 10 entries in # each group/file - lh5_it = LH5Iterator( + lh5_it = lh5.Iterator( more_lgnd_files[1] * 3, ["ch1084803/hit"] * 2 + ["ch1084804/hit"] * 2 + ["ch1121600/hit"] * 2, field_mask=["is_valid_0vbb", "timestamp", "zacEmax_ctc_cal"], diff --git a/tests/lh5/test_lh5_store.py b/tests/lh5/test_lh5_store.py index 25491660..4743dc0d 100644 --- a/tests/lh5/test_lh5_store.py +++ b/tests/lh5/test_lh5_store.py @@ -7,10 +7,11 @@ import pytest import lgdo -import lgdo.lh5_store as lh5 +import lgdo.lh5 as lh5 +import lgdo.types as types from lgdo import compression from lgdo.compression import RadwareSigcompress -from lgdo.lh5_store import DEFAULT_HDF5_SETTINGS, LH5Store +from lgdo.lh5.store import DEFAULT_HDF5_SETTINGS @pytest.fixture(scope="module") @@ -19,11 +20,11 @@ def lgnd_file(lgnd_test_data): def test_init(): - LH5Store() + lh5.Store() def test_gimme_file(lgnd_file): - store = LH5Store(keep_open=True) + store = lh5.Store(keep_open=True) f = store.gimme_file(lgnd_file) assert isinstance(f, h5py.File) @@ -35,7 +36,7 @@ def test_gimme_file(lgnd_file): def test_gimme_group(lgnd_file, tmptestdir): f = h5py.File(lgnd_file) - store = LH5Store() + store = lh5.Store() g = store.gimme_group("/geds", f) assert isinstance(g, h5py.Group) @@ -44,12 +45,6 @@ def test_gimme_group(lgnd_file, tmptestdir): assert isinstance(g, h5py.Group) -def test_show(lgnd_file): - lh5.show(lgnd_file) - lh5.show(lgnd_file, "/geds/raw") - lh5.show(lgnd_file, "geds/raw") - - def test_ls(lgnd_file): assert lh5.ls(lgnd_file) == ["geds"] assert lh5.ls(lgnd_file, "/*/raw") == ["geds/raw"] @@ -68,6 +63,12 @@ def test_ls(lgnd_file): ] +def test_show(lgnd_file): + lh5.show(lgnd_file) + lh5.show(lgnd_file, "/geds/raw") + lh5.show(lgnd_file, "geds/raw") + + def test_load_nda(lgnd_file): nda = lh5.load_nda( [lgnd_file, lgnd_file], @@ -83,49 +84,38 @@ def test_load_nda(lgnd_file): assert nda["waveform/values"].shape == (6, 5592) -def test_load_dfs(lgnd_file): - dfs = lh5.load_dfs( - [lgnd_file, lgnd_file], - ["baseline", "waveform/t0"], - lh5_group="/geds/raw", - idx_list=[[1, 3, 5], [2, 6, 7]], - ) - - assert isinstance(dfs, pd.DataFrame) - - @pytest.fixture(scope="module") def lh5_file(tmptestdir): - store = LH5Store() + store = lh5.Store() struct = lgdo.Struct() struct.add_field("scalar", lgdo.Scalar(value=10, attrs={"sth": 1})) - struct.add_field("array", lgdo.Array(nda=np.array([1, 2, 3, 4, 5]))) + struct.add_field("array", types.Array(nda=np.array([1, 2, 3, 4, 5]))) struct.add_field( "aoesa", - lgdo.ArrayOfEqualSizedArrays(shape=(5, 5), dtype=np.float32, fill_val=42), + types.ArrayOfEqualSizedArrays(shape=(5, 5), dtype=np.float32, fill_val=42), ) struct.add_field( "vov", - lgdo.VectorOfVectors( - flattened_data=lgdo.Array( + types.VectorOfVectors( + flattened_data=types.Array( nda=np.array([1, 2, 3, 4, 5, 2, 4, 8, 9, 7, 5, 3, 1]) ), - cumulative_length=lgdo.Array(nda=np.array([2, 5, 6, 10, 13])), + cumulative_length=types.Array(nda=np.array([2, 5, 6, 10, 13])), attrs={"myattr": 2}, ), ) struct.add_field( "voev", - lgdo.VectorOfEncodedVectors( - encoded_data=lgdo.VectorOfVectors( - flattened_data=lgdo.Array( + types.VectorOfEncodedVectors( + encoded_data=types.VectorOfVectors( + flattened_data=types.Array( nda=np.array([1, 2, 3, 4, 5, 2, 4, 8, 9, 7, 5, 3, 1]) ), - cumulative_length=lgdo.Array(nda=np.array([2, 5, 6, 10, 13])), + cumulative_length=types.Array(nda=np.array([2, 5, 6, 10, 13])), ), - decoded_size=lgdo.Array(shape=5, fill_val=6), + decoded_size=types.Array(shape=5, fill_val=6), ), ) @@ -142,14 +132,14 @@ def lh5_file(tmptestdir): ), } - struct.add_field("table", lgdo.Table(col_dict=col_dict, attrs={"stuff": 5})) + struct.add_field("table", types.Table(col_dict=col_dict, attrs={"stuff": 5})) struct.add_field( "wftable", - lgdo.WaveformTable( - t0=lgdo.Array(np.zeros(10)), - dt=lgdo.Array(np.full(10, fill_value=1)), - values=lgdo.ArrayOfEqualSizedArrays( + types.WaveformTable( + t0=types.Array(np.zeros(10)), + dt=types.Array(np.full(10, fill_value=1)), + values=types.ArrayOfEqualSizedArrays( shape=(10, 1000), dtype=np.uint16, fill_val=100, attrs={"custom": 8} ), ), @@ -157,16 +147,16 @@ def lh5_file(tmptestdir): struct.add_field( "wftable_enc", - lgdo.WaveformTable( - t0=lgdo.Array(np.zeros(10)), - dt=lgdo.Array(np.full(10, fill_value=1)), + types.WaveformTable( + t0=types.Array(np.zeros(10)), + dt=types.Array(np.full(10, fill_value=1)), values=compression.encode( struct["wftable"].values, codec=RadwareSigcompress(codec_shift=-32768) ), ), ) - store.write_object( + store.write( struct, "struct", f"{tmptestdir}/tmp-pygama-lgdo-types.lh5", @@ -176,7 +166,7 @@ def lh5_file(tmptestdir): wo_mode="overwrite_file", ) - store.write_object( + store.write( struct, "struct_full", f"{tmptestdir}/tmp-pygama-lgdo-types.lh5", @@ -194,7 +184,7 @@ def test_write_objects(lh5_file): def test_read_n_rows(lh5_file): - store = LH5Store() + store = lh5.Store() assert store.read_n_rows("/data/struct_full/aoesa", lh5_file) == 5 assert store.read_n_rows("/data/struct_full/array", lh5_file) == 5 assert store.read_n_rows("/data/struct_full/scalar", lh5_file) is None @@ -206,14 +196,14 @@ def test_read_n_rows(lh5_file): def test_get_buffer(lh5_file): - store = LH5Store() + store = lh5.Store() buf = store.get_buffer("/data/struct_full/wftable_enc", lh5_file) - assert isinstance(buf.values, lgdo.ArrayOfEqualSizedArrays) + assert isinstance(buf.values, types.ArrayOfEqualSizedArrays) def test_read_scalar(lh5_file): - store = LH5Store() - lh5_obj, n_rows = store.read_object("/data/struct/scalar", lh5_file) + store = lh5.Store() + lh5_obj, n_rows = store.read("/data/struct/scalar", lh5_file) assert isinstance(lh5_obj, lgdo.Scalar) assert lh5_obj.value == 10 assert n_rows == 1 @@ -223,9 +213,9 @@ def test_read_scalar(lh5_file): def test_read_array(lh5_file): - store = LH5Store() - lh5_obj, n_rows = store.read_object("/data/struct/array", lh5_file) - assert isinstance(lh5_obj, lgdo.Array) + store = lh5.Store() + lh5_obj, n_rows = store.read("/data/struct/array", lh5_file) + assert isinstance(lh5_obj, types.Array) assert (lh5_obj.nda == np.array([2, 3, 4])).all() assert n_rows == 3 with h5py.File(lh5_file) as h5f: @@ -236,19 +226,17 @@ def test_read_array(lh5_file): def test_read_array_fancy_idx(lh5_file): - store = LH5Store() - lh5_obj, n_rows = store.read_object( - "/data/struct_full/array", lh5_file, idx=[0, 3, 4] - ) - assert isinstance(lh5_obj, lgdo.Array) + store = lh5.Store() + lh5_obj, n_rows = store.read("/data/struct_full/array", lh5_file, idx=[0, 3, 4]) + assert isinstance(lh5_obj, types.Array) assert (lh5_obj.nda == np.array([1, 4, 5])).all() assert n_rows == 3 def test_read_vov(lh5_file): - store = LH5Store() - lh5_obj, n_rows = store.read_object("/data/struct/vov", lh5_file) - assert isinstance(lh5_obj, lgdo.VectorOfVectors) + store = lh5.Store() + lh5_obj, n_rows = store.read("/data/struct/vov", lh5_file) + assert isinstance(lh5_obj, types.VectorOfVectors) desired = [np.array([3, 4, 5]), np.array([2]), np.array([4, 8, 9, 7])] @@ -270,9 +258,9 @@ def test_read_vov(lh5_file): def test_read_vov_fancy_idx(lh5_file): - store = LH5Store() - lh5_obj, n_rows = store.read_object("/data/struct_full/vov", lh5_file, idx=[0, 2]) - assert isinstance(lh5_obj, lgdo.VectorOfVectors) + store = lh5.Store() + lh5_obj, n_rows = store.read("/data/struct_full/vov", lh5_file, idx=[0, 2]) + assert isinstance(lh5_obj, types.VectorOfVectors) desired = [np.array([1, 2]), np.array([2])] @@ -283,9 +271,9 @@ def test_read_vov_fancy_idx(lh5_file): def test_read_voev(lh5_file): - store = LH5Store() - lh5_obj, n_rows = store.read_object("/data/struct/voev", lh5_file, decompress=False) - assert isinstance(lh5_obj, lgdo.VectorOfEncodedVectors) + store = lh5.Store() + lh5_obj, n_rows = store.read("/data/struct/voev", lh5_file, decompress=False) + assert isinstance(lh5_obj, types.VectorOfEncodedVectors) desired = [np.array([3, 4, 5]), np.array([2]), np.array([4, 8, 9, 7])] @@ -294,10 +282,10 @@ def test_read_voev(lh5_file): assert n_rows == 3 - lh5_obj, n_rows = store.read_object( + lh5_obj, n_rows = store.read( "/data/struct/voev", [lh5_file, lh5_file], decompress=False ) - assert isinstance(lh5_obj, lgdo.VectorOfEncodedVectors) + assert isinstance(lh5_obj, types.VectorOfEncodedVectors) assert n_rows == 6 with h5py.File(lh5_file) as h5f: @@ -313,11 +301,11 @@ def test_read_voev(lh5_file): def test_read_voev_fancy_idx(lh5_file): - store = LH5Store() - lh5_obj, n_rows = store.read_object( + store = lh5.Store() + lh5_obj, n_rows = store.read( "/data/struct_full/voev", lh5_file, idx=[0, 2], decompress=False ) - assert isinstance(lh5_obj, lgdo.VectorOfEncodedVectors) + assert isinstance(lh5_obj, types.VectorOfEncodedVectors) desired = [np.array([1, 2]), np.array([2])] @@ -328,27 +316,27 @@ def test_read_voev_fancy_idx(lh5_file): def test_read_aoesa(lh5_file): - store = LH5Store() - lh5_obj, n_rows = store.read_object("/data/struct/aoesa", lh5_file) - assert isinstance(lh5_obj, lgdo.ArrayOfEqualSizedArrays) + store = lh5.Store() + lh5_obj, n_rows = store.read("/data/struct/aoesa", lh5_file) + assert isinstance(lh5_obj, types.ArrayOfEqualSizedArrays) assert (lh5_obj.nda == np.full((3, 5), fill_value=42)).all() def test_read_table(lh5_file): - store = LH5Store() - lh5_obj, n_rows = store.read_object("/data/struct/table", lh5_file) - assert isinstance(lh5_obj, lgdo.Table) + store = lh5.Store() + lh5_obj, n_rows = store.read("/data/struct/table", lh5_file) + assert isinstance(lh5_obj, types.Table) assert n_rows == 3 - lh5_obj, n_rows = store.read_object("/data/struct/table", [lh5_file, lh5_file]) + lh5_obj, n_rows = store.read("/data/struct/table", [lh5_file, lh5_file]) assert n_rows == 6 assert lh5_obj.attrs["stuff"] == 5 assert lh5_obj["a"].attrs["attr"] == 9 def test_read_hdf5_compressed_data(lh5_file): - store = LH5Store() - lh5_obj, n_rows = store.read_object("/data/struct/table", lh5_file) + store = lh5.Store() + lh5_obj, n_rows = store.read("/data/struct/table", lh5_file) assert "compression" not in lh5_obj["b"].attrs with h5py.File(lh5_file) as h5f: @@ -363,12 +351,12 @@ def test_read_hdf5_compressed_data(lh5_file): def test_read_wftable(lh5_file): - store = LH5Store() - lh5_obj, n_rows = store.read_object("/data/struct/wftable", lh5_file) - assert isinstance(lh5_obj, lgdo.WaveformTable) + store = lh5.Store() + lh5_obj, n_rows = store.read("/data/struct/wftable", lh5_file) + assert isinstance(lh5_obj, types.WaveformTable) assert n_rows == 3 - lh5_obj, n_rows = store.read_object("/data/struct/wftable", [lh5_file, lh5_file]) + lh5_obj, n_rows = store.read("/data/struct/wftable", [lh5_file, lh5_file]) assert n_rows == 6 assert lh5_obj.values.attrs["custom"] == 8 @@ -388,32 +376,30 @@ def test_read_wftable(lh5_file): def test_read_wftable_encoded(lh5_file): - store = LH5Store() - lh5_obj, n_rows = store.read_object( - "/data/struct/wftable_enc", lh5_file, decompress=False - ) - assert isinstance(lh5_obj, lgdo.WaveformTable) - assert isinstance(lh5_obj.values, lgdo.ArrayOfEncodedEqualSizedArrays) + store = lh5.Store() + lh5_obj, n_rows = store.read("/data/struct/wftable_enc", lh5_file, decompress=False) + assert isinstance(lh5_obj, types.WaveformTable) + assert isinstance(lh5_obj.values, types.ArrayOfEncodedEqualSizedArrays) assert n_rows == 3 assert lh5_obj.values.attrs["codec"] == "radware_sigcompress" assert "codec_shift" in lh5_obj.values.attrs - lh5_obj, n_rows = store.read_object("/data/struct/wftable_enc/values", lh5_file) + lh5_obj, n_rows = store.read("/data/struct/wftable_enc/values", lh5_file) assert isinstance(lh5_obj, lgdo.ArrayOfEqualSizedArrays) assert n_rows == 3 - lh5_obj, n_rows = store.read_object("/data/struct/wftable_enc", lh5_file) + lh5_obj, n_rows = store.read("/data/struct/wftable_enc", lh5_file) assert isinstance(lh5_obj, lgdo.WaveformTable) assert isinstance(lh5_obj.values, lgdo.ArrayOfEqualSizedArrays) assert n_rows == 3 - lh5_obj_chain, n_rows = store.read_object( + lh5_obj_chain, n_rows = store.read( "/data/struct/wftable_enc", [lh5_file, lh5_file], decompress=False ) assert n_rows == 6 assert isinstance(lh5_obj_chain.values, lgdo.ArrayOfEncodedEqualSizedArrays) - lh5_obj_chain, n_rows = store.read_object( + lh5_obj_chain, n_rows = store.read( "/data/struct/wftable_enc", [lh5_file, lh5_file], decompress=True ) assert isinstance(lh5_obj_chain.values, lgdo.ArrayOfEqualSizedArrays) @@ -440,24 +426,22 @@ def test_read_wftable_encoded(lh5_file): def test_read_with_field_mask(lh5_file): - store = LH5Store() + store = lh5.Store() - lh5_obj, n_rows = store.read_object( - "/data/struct_full", lh5_file, field_mask=["array"] - ) + lh5_obj, n_rows = store.read("/data/struct_full", lh5_file, field_mask=["array"]) assert list(lh5_obj.keys()) == ["array"] - lh5_obj, n_rows = store.read_object( + lh5_obj, n_rows = store.read( "/data/struct_full", lh5_file, field_mask=("array", "table") ) assert list(lh5_obj.keys()) == ["array", "table"] - lh5_obj, n_rows = store.read_object( + lh5_obj, n_rows = store.read( "/data/struct_full", lh5_file, field_mask={"array": True} ) assert list(lh5_obj.keys()) == ["array"] - lh5_obj, n_rows = store.read_object( + lh5_obj, n_rows = store.read( "/data/struct_full", lh5_file, field_mask={"vov": False, "voev": False} ) assert list(lh5_obj.keys()) == [ @@ -471,45 +455,45 @@ def test_read_with_field_mask(lh5_file): def test_read_lgnd_array(lgnd_file): - store = LH5Store() + store = lh5.Store() - lh5_obj, n_rows = store.read_object("/geds/raw/baseline", lgnd_file) - assert isinstance(lh5_obj, lgdo.Array) + lh5_obj, n_rows = store.read("/geds/raw/baseline", lgnd_file) + assert isinstance(lh5_obj, types.Array) assert n_rows == 100 assert len(lh5_obj) == 100 - lh5_obj, n_rows = store.read_object("/geds/raw/waveform/values", lgnd_file) - assert isinstance(lh5_obj, lgdo.ArrayOfEqualSizedArrays) + lh5_obj, n_rows = store.read("/geds/raw/waveform/values", lgnd_file) + assert isinstance(lh5_obj, types.ArrayOfEqualSizedArrays) def test_read_lgnd_array_fancy_idx(lgnd_file): - store = LH5Store() + store = lh5.Store() - lh5_obj, n_rows = store.read_object( + lh5_obj, n_rows = store.read( "/geds/raw/baseline", lgnd_file, idx=[2, 4, 6, 9, 11, 16, 68] ) - assert isinstance(lh5_obj, lgdo.Array) + assert isinstance(lh5_obj, types.Array) assert n_rows == 7 assert len(lh5_obj) == 7 assert (lh5_obj.nda == [13508, 14353, 14525, 14341, 15079, 11675, 13995]).all() def test_read_lgnd_vov(lgnd_file): - store = LH5Store() + store = lh5.Store() - lh5_obj, n_rows = store.read_object("/geds/raw/tracelist", lgnd_file) - assert isinstance(lh5_obj, lgdo.VectorOfVectors) + lh5_obj, n_rows = store.read("/geds/raw/tracelist", lgnd_file) + assert isinstance(lh5_obj, types.VectorOfVectors) assert n_rows == 100 assert len(lh5_obj) == 100 def test_read_lgnd_vov_fancy_idx(lgnd_file): - store = LH5Store() + store = lh5.Store() - lh5_obj, n_rows = store.read_object( + lh5_obj, n_rows = store.read( "/geds/raw/tracelist", lgnd_file, idx=[2, 4, 6, 9, 11, 16, 68] ) - assert isinstance(lh5_obj, lgdo.VectorOfVectors) + assert isinstance(lh5_obj, types.VectorOfVectors) assert n_rows == 7 assert len(lh5_obj) == 7 assert (lh5_obj.cumulative_length.nda == [1, 2, 3, 4, 5, 6, 7]).all() @@ -517,20 +501,20 @@ def test_read_lgnd_vov_fancy_idx(lgnd_file): def test_read_array_concatenation(lgnd_file): - store = LH5Store() - lh5_obj, n_rows = store.read_object("/geds/raw/baseline", [lgnd_file, lgnd_file]) - assert isinstance(lh5_obj, lgdo.Array) + store = lh5.Store() + lh5_obj, n_rows = store.read("/geds/raw/baseline", [lgnd_file, lgnd_file]) + assert isinstance(lh5_obj, types.Array) assert n_rows == 200 assert len(lh5_obj) == 200 def test_read_lgnd_waveform_table(lgnd_file): - store = LH5Store() + store = lh5.Store() - lh5_obj, n_rows = store.read_object("/geds/raw/waveform", lgnd_file) - assert isinstance(lh5_obj, lgdo.WaveformTable) + lh5_obj, n_rows = store.read("/geds/raw/waveform", lgnd_file) + assert isinstance(lh5_obj, types.WaveformTable) - lh5_obj, n_rows = store.read_object( + lh5_obj, n_rows = store.read( "/geds/raw/waveform", lgnd_file, start_row=10, @@ -538,29 +522,29 @@ def test_read_lgnd_waveform_table(lgnd_file): field_mask=["t0", "dt"], ) - assert isinstance(lh5_obj, lgdo.Table) + assert isinstance(lh5_obj, types.Table) assert list(lh5_obj.keys()) == ["t0", "dt"] assert len(lh5_obj) == 10 def test_read_lgnd_waveform_table_fancy_idx(lgnd_file): - store = LH5Store() + store = lh5.Store() - lh5_obj, n_rows = store.read_object( + lh5_obj, n_rows = store.read( "/geds/raw/waveform", lgnd_file, idx=[7, 9, 25, 27, 33, 38, 46, 52, 57, 59, 67, 71, 72, 82, 90, 92, 93, 94, 97], ) - assert isinstance(lh5_obj, lgdo.WaveformTable) + assert isinstance(lh5_obj, types.WaveformTable) assert len(lh5_obj) == 19 @pytest.fixture(scope="module") def enc_lgnd_file(lgnd_file, tmptestdir): - store = LH5Store() - wft, n_rows = store.read_object("/geds/raw/waveform", lgnd_file) + store = lh5.Store() + wft, n_rows = store.read("/geds/raw/waveform", lgnd_file) wft.values.attrs["compression"] = RadwareSigcompress(codec_shift=-32768) - store.write_object( + store.write( wft, "/geds/raw/waveform", f"{tmptestdir}/tmp-pygama-compressed-wfs.lh5", @@ -574,16 +558,16 @@ def test_write_compressed_lgnd_waveform_table(enc_lgnd_file): def test_read_compressed_lgnd_waveform_table(lgnd_file, enc_lgnd_file): - store = LH5Store() - wft, _ = store.read_object("/geds/raw/waveform", enc_lgnd_file) - assert isinstance(wft.values, lgdo.ArrayOfEqualSizedArrays) + store = lh5.Store() + wft, _ = store.read("/geds/raw/waveform", enc_lgnd_file) + assert isinstance(wft.values, types.ArrayOfEqualSizedArrays) assert "compression" not in wft.values.attrs def test_write_with_hdf5_compression(lgnd_file, tmptestdir): - store = LH5Store() - wft, n_rows = store.read_object("/geds/raw/waveform", lgnd_file) - store.write_object( + store = lh5.Store() + wft, n_rows = store.read("/geds/raw/waveform", lgnd_file) + store.write( wft, "/geds/raw/waveform", f"{tmptestdir}/tmp-pygama-hdf5-compressed-wfs.lh5", @@ -597,7 +581,7 @@ def test_write_with_hdf5_compression(lgnd_file, tmptestdir): assert h5f["/geds/raw/waveform/values"].compression_opts == 9 assert h5f["/geds/raw/waveform/values"].shuffle is True - store.write_object( + store.write( wft, "/geds/raw/waveform", f"{tmptestdir}/tmp-pygama-hdf5-compressed-wfs.lh5", @@ -618,13 +602,13 @@ def test_write_object_overwrite_table_no_deletion(caplog, tmptestdir): if os.path.exists(f"{tmptestdir}/write_object_overwrite_test.lh5"): os.remove(f"{tmptestdir}/write_object_overwrite_test.lh5") - tb1 = lh5.Table(col_dict={"dset1": lh5.Array(np.zeros(10))}) - tb2 = lh5.Table( - col_dict={"dset1": lh5.Array(np.ones(10))} + tb1 = types.Table(col_dict={"dset1": types.Array(np.zeros(10))}) + tb2 = types.Table( + col_dict={"dset1": types.Array(np.ones(10))} ) # Same field name, different values - store = LH5Store() - store.write_object(tb1, "my_group", f"{tmptestdir}/write_object_overwrite_test.lh5") - store.write_object( + store = lh5.Store() + store.write(tb1, "my_group", f"{tmptestdir}/write_object_overwrite_test.lh5") + store.write( tb2, "my_group", f"{tmptestdir}/write_object_overwrite_test.lh5", @@ -637,9 +621,7 @@ def test_write_object_overwrite_table_no_deletion(caplog, tmptestdir): ] # Now, check that the data were overwritten - tb_dat, _ = store.read_object( - "my_group", f"{tmptestdir}/write_object_overwrite_test.lh5" - ) + tb_dat, _ = store.read("my_group", f"{tmptestdir}/write_object_overwrite_test.lh5") assert np.array_equal(tb_dat["dset1"].nda, np.ones(10)) @@ -651,13 +633,13 @@ def test_write_object_overwrite_table_with_deletion(caplog, tmptestdir): if os.path.exists(f"{tmptestdir}/write_object_overwrite_test.lh5"): os.remove(f"{tmptestdir}/write_object_overwrite_test.lh5") - tb1 = lh5.Table(col_dict={"dset1": lh5.Array(np.zeros(10))}) - tb2 = lh5.Table( - col_dict={"dset2": lh5.Array(np.ones(10))} + tb1 = types.Table(col_dict={"dset1": types.Array(np.zeros(10))}) + tb2 = types.Table( + col_dict={"dset2": types.Array(np.ones(10))} ) # Same field name, different values - store = LH5Store() - store.write_object(tb1, "my_group", f"{tmptestdir}/write_object_overwrite_test.lh5") - store.write_object( + store = lh5.Store() + store.write(tb1, "my_group", f"{tmptestdir}/write_object_overwrite_test.lh5") + store.write( tb2, "my_group", f"{tmptestdir}/write_object_overwrite_test.lh5", @@ -665,9 +647,7 @@ def test_write_object_overwrite_table_with_deletion(caplog, tmptestdir): ) # Now, try to overwrite with a different field # Now, check that the data were overwritten - tb_dat, _ = store.read_object( - "my_group", f"{tmptestdir}/write_object_overwrite_test.lh5" - ) + tb_dat, _ = store.read("my_group", f"{tmptestdir}/write_object_overwrite_test.lh5") assert np.array_equal(tb_dat["dset2"].nda, np.ones(10)) # Also make sure that the first table's fields aren't lurking around the lh5 file! @@ -678,18 +658,18 @@ def test_write_object_overwrite_table_with_deletion(caplog, tmptestdir): if os.path.exists(f"{tmptestdir}/write_object_overwrite_test.lh5"): os.remove(f"{tmptestdir}/write_object_overwrite_test.lh5") - tb1 = lh5.Table(col_dict={"dset1": lh5.Array(np.zeros(10))}) - tb2 = lh5.Table( - col_dict={"dset2": lh5.Array(np.ones(10))} + tb1 = types.Table(col_dict={"dset1": types.Array(np.zeros(10))}) + tb2 = types.Table( + col_dict={"dset2": types.Array(np.ones(10))} ) # Same field name, different values - store = LH5Store() - store.write_object( + store = lh5.Store() + store.write( tb1, "my_table", f"{tmptestdir}/write_object_overwrite_test.lh5", group="my_group", ) - store.write_object( + store.write( tb2, "my_table", f"{tmptestdir}/write_object_overwrite_test.lh5", @@ -698,7 +678,7 @@ def test_write_object_overwrite_table_with_deletion(caplog, tmptestdir): ) # Now, try to overwrite with a different field # Now, check that the data were overwritten - tb_dat, _ = store.read_object( + tb_dat, _ = store.read( "my_group/my_table", f"{tmptestdir}/write_object_overwrite_test.lh5" ) assert np.array_equal(tb_dat["dset2"].nda, np.ones(10)) @@ -713,11 +693,11 @@ def test_write_object_overwrite_lgdo(caplog, tmptestdir): caplog.set_level(logging.DEBUG) caplog.clear() - # Start with an lgdo.WaveformTable + # Start with an types.WaveformTable if os.path.exists(f"{tmptestdir}/write_object_overwrite_test.lh5"): os.remove(f"{tmptestdir}/write_object_overwrite_test.lh5") - tb1 = lh5.WaveformTable( + tb1 = types.WaveformTable( t0=np.zeros(10), t0_units="ns", dt=np.zeros(10), @@ -725,7 +705,7 @@ def test_write_object_overwrite_lgdo(caplog, tmptestdir): values=np.zeros((10, 10)), values_units="ADC", ) - tb2 = lh5.WaveformTable( + tb2 = types.WaveformTable( t0=np.ones(10), t0_units="ns", dt=np.ones(10), @@ -733,14 +713,14 @@ def test_write_object_overwrite_lgdo(caplog, tmptestdir): values=np.ones((10, 10)), values_units="ADC", ) # Same field name, different values - store = LH5Store() - store.write_object( + store = lh5.Store() + store.write( tb1, "my_table", f"{tmptestdir}/write_object_overwrite_test.lh5", group="my_group", ) - store.write_object( + store.write( tb2, "my_table", f"{tmptestdir}/write_object_overwrite_test.lh5", @@ -754,19 +734,17 @@ def test_write_object_overwrite_lgdo(caplog, tmptestdir): ] # Now, check that the data were overwritten - tb_dat, _ = store.read_object( + tb_dat, _ = store.read( "my_group/my_table", f"{tmptestdir}/write_object_overwrite_test.lh5" ) assert np.array_equal(tb_dat["values"].nda, np.ones((10, 10))) # Now try overwriting an array, and test the write_start argument - array1 = lh5.Array(nda=np.zeros(10)) - array2 = lh5.Array(nda=np.ones(20)) - store = LH5Store() - store.write_object( - array1, "my_array", f"{tmptestdir}/write_object_overwrite_test.lh5" - ) - store.write_object( + array1 = types.Array(nda=np.zeros(10)) + array2 = types.Array(nda=np.ones(20)) + store = lh5.Store() + store.write(array1, "my_array", f"{tmptestdir}/write_object_overwrite_test.lh5") + store.write( array2, "my_array", f"{tmptestdir}/write_object_overwrite_test.lh5", @@ -775,7 +753,7 @@ def test_write_object_overwrite_lgdo(caplog, tmptestdir): ) # Now, check that the data were overwritten - array_dat, _ = store.read_object( + array_dat, _ = store.read( "my_array", f"{tmptestdir}/write_object_overwrite_test.lh5" ) expected_out_array = np.append(np.zeros(5), np.ones(20)) @@ -783,13 +761,11 @@ def test_write_object_overwrite_lgdo(caplog, tmptestdir): assert np.array_equal(array_dat.nda, expected_out_array) # Now try overwriting a scalar - scalar1 = lh5.Scalar(0) - scalar2 = lh5.Scalar(1) - store = LH5Store() - store.write_object( - scalar1, "my_scalar", f"{tmptestdir}/write_object_overwrite_test.lh5" - ) - store.write_object( + scalar1 = types.Scalar(0) + scalar2 = types.Scalar(1) + store = lh5.Store() + store.write(scalar1, "my_scalar", f"{tmptestdir}/write_object_overwrite_test.lh5") + store.write( scalar2, "my_scalar", f"{tmptestdir}/write_object_overwrite_test.lh5", @@ -797,20 +773,18 @@ def test_write_object_overwrite_lgdo(caplog, tmptestdir): ) # Now, check that the data were overwritten - scalar_dat, _ = store.read_object( + scalar_dat, _ = store.read( "my_scalar", f"{tmptestdir}/write_object_overwrite_test.lh5" ) assert scalar_dat.value == 1 # Finally, try overwriting a vector of vectors - vov1 = lh5.VectorOfVectors(listoflists=[np.zeros(1), np.ones(2), np.zeros(3)]) - vov2 = lh5.VectorOfVectors(listoflists=[np.ones(1), np.zeros(2), np.ones(3)]) - store = LH5Store() - store.write_object( - vov1, "my_vector", f"{tmptestdir}/write_object_overwrite_test.lh5" - ) - store.write_object( + vov1 = types.VectorOfVectors(listoflists=[np.zeros(1), np.ones(2), np.zeros(3)]) + vov2 = types.VectorOfVectors(listoflists=[np.ones(1), np.zeros(2), np.ones(3)]) + store = lh5.Store() + store.write(vov1, "my_vector", f"{tmptestdir}/write_object_overwrite_test.lh5") + store.write( vov2, "my_vector", f"{tmptestdir}/write_object_overwrite_test.lh5", @@ -818,7 +792,7 @@ def test_write_object_overwrite_lgdo(caplog, tmptestdir): write_start=1, ) # start overwriting the second list of lists - vector_dat, _ = store.read_object( + vector_dat, _ = store.read( "my_vector", f"{tmptestdir}/write_object_overwrite_test.lh5" ) @@ -832,14 +806,12 @@ def test_write_object_append_column(tmptestdir): if os.path.exists(f"{tmptestdir}/write_object_append_column_test.lh5"): os.remove(f"{tmptestdir}/write_object_append_column_test.lh5") - array1 = lh5.Array(np.zeros(10)) - tb1 = lh5.Table(col_dict={"dset1`": lh5.Array(np.ones(10))}) - store = LH5Store() - store.write_object( - array1, "my_table", f"{tmptestdir}/write_object_append_column_test.lh5" - ) + array1 = types.Array(np.zeros(10)) + tb1 = types.Table(col_dict={"dset1`": types.Array(np.ones(10))}) + store = lh5.Store() + store.write(array1, "my_table", f"{tmptestdir}/write_object_append_column_test.lh5") with pytest.raises(RuntimeError) as exc_info: - store.write_object( + store.write( tb1, "my_table", f"{tmptestdir}/write_object_append_column_test.lh5", @@ -855,18 +827,19 @@ def test_write_object_append_column(tmptestdir): if os.path.exists(f"{tmptestdir}/write_object_append_column_test.lh5"): os.remove(f"{tmptestdir}/write_object_append_column_test.lh5") - tb1 = lh5.Table( - col_dict={"dset1": lh5.Array(np.zeros(10)), "dset2": lh5.Array(np.zeros(10))} + tb1 = types.Table( + col_dict={ + "dset1": types.Array(np.zeros(10)), + "dset2": types.Array(np.zeros(10)), + } ) - tb2 = lh5.Table( - col_dict={"dset2": lh5.Array(np.ones(10))} + tb2 = types.Table( + col_dict={"dset2": types.Array(np.ones(10))} ) # Same field name, different values - store = LH5Store() - store.write_object( - tb1, "my_table", f"{tmptestdir}/write_object_append_column_test.lh5" - ) + store = lh5.Store() + store.write(tb1, "my_table", f"{tmptestdir}/write_object_append_column_test.lh5") with pytest.raises(ValueError) as exc_info: - store.write_object( + store.write( tb2, "my_table", f"{tmptestdir}/write_object_append_column_test.lh5", @@ -883,16 +856,14 @@ def test_write_object_append_column(tmptestdir): if os.path.exists(f"{tmptestdir}/write_object_append_column_test.lh5"): os.remove(f"{tmptestdir}/write_object_append_column_test.lh5") - tb1 = lh5.Table(col_dict={"dset1": lh5.Array(np.zeros(10))}) - tb2 = lh5.Table( - col_dict={"dset2": lh5.Array(np.ones(20))} + tb1 = types.Table(col_dict={"dset1": types.Array(np.zeros(10))}) + tb2 = types.Table( + col_dict={"dset2": types.Array(np.ones(20))} ) # different field name, different size - store = LH5Store() - store.write_object( - tb1, "my_table", f"{tmptestdir}/write_object_append_column_test.lh5" - ) + store = lh5.Store() + store.write(tb1, "my_table", f"{tmptestdir}/write_object_append_column_test.lh5") with pytest.raises(ValueError) as exc_info: - store.write_object( + store.write( tb2, "my_table", f"{tmptestdir}/write_object_append_column_test.lh5", @@ -909,18 +880,18 @@ def test_write_object_append_column(tmptestdir): if os.path.exists(f"{tmptestdir}/write_object_append_column_test.lh5"): os.remove(f"{tmptestdir}/write_object_append_column_test.lh5") - tb1 = lh5.Table(col_dict={"dset1": lh5.Array(np.zeros(10))}) - tb2 = lh5.Table( - col_dict={"dset2": lh5.Array(np.ones(10))} + tb1 = types.Table(col_dict={"dset1": types.Array(np.zeros(10))}) + tb2 = types.Table( + col_dict={"dset2": types.Array(np.ones(10))} ) # different field name, different size - store = LH5Store() - store.write_object( + store = lh5.Store() + store.write( tb1, "my_table", f"{tmptestdir}/write_object_append_column_test.lh5", group="my_group", ) - store.write_object( + store.write( tb2, "my_table", f"{tmptestdir}/write_object_append_column_test.lh5", @@ -929,9 +900,20 @@ def test_write_object_append_column(tmptestdir): ) # Now, check that the data were appended - tb_dat, _ = store.read_object( + tb_dat, _ = store.read( "my_group/my_table", f"{tmptestdir}/write_object_append_column_test.lh5" ) - assert isinstance(tb_dat, lgdo.Table) + assert isinstance(tb_dat, types.Table) assert np.array_equal(tb_dat["dset1"].nda, np.zeros(10)) assert np.array_equal(tb_dat["dset2"].nda, np.ones(10)) + + +def test_load_dfs(lgnd_file): + dfs = lh5.load_dfs( + [lgnd_file, lgnd_file], + ["baseline", "waveform/t0"], + lh5_group="/geds/raw", + idx_list=[[1, 3, 5], [2, 6, 7]], + ) + + assert isinstance(dfs, pd.DataFrame) diff --git a/tests/lh5/test_lh5_utils.py b/tests/lh5/test_lh5_utils.py new file mode 100644 index 00000000..c83dd9a9 --- /dev/null +++ b/tests/lh5/test_lh5_utils.py @@ -0,0 +1,72 @@ +import os + +import pytest + +import lgdo.lh5.utils as utils + + +@pytest.fixture(scope="module") +def lgnd_file(lgnd_test_data): + return lgnd_test_data.get_path("lh5/LDQTA_r117_20200110T105115Z_cal_geds_raw.lh5") + + +def test_parse_datatype(): + datatypes = [ + ("real", ("scalar", None, "real")), + ("array<1>{bool}", ("array", (1,), "bool")), + ("fixedsizearray<2>{real}", ("fixedsizearray", (2,), "real")), + ( + "arrayofequalsizedarrays<3,4>{complex}", + ("arrayofequalsizedarrays", (3, 4), "complex"), + ), + ("array<1>{array<1>{blob}}", ("array", (1,), "array<1>{blob}")), + ( + "struct{field1,field2,fieldn}", + ("struct", None, ["field1", "field2", "fieldn"]), + ), + ("table{col1,col2,coln}", ("table", None, ["col1", "col2", "coln"])), + ] + + for string, dt_tuple in datatypes: + pd_dt_tuple = utils.parse_datatype(string) + assert pd_dt_tuple == dt_tuple + + +def test_expand_vars(): + # Check env variable expansion + os.environ["PYGAMATESTBASEDIR"] = "a_random_string" + assert utils.expand_vars("$PYGAMATESTBASEDIR/blah") == "a_random_string/blah" + + # Check user variable expansion + assert ( + utils.expand_vars( + "$PYGAMATESTBASEDIR2/blah", + substitute={"PYGAMATESTBASEDIR2": "a_random_string"}, + ) + == "a_random_string/blah" + ) + + +def test_expand_path(lgnd_test_data): + files = [ + lgnd_test_data.get_path( + "lh5/prod-ref-l200/generated/tier/dsp/cal/p03/r001/l200-p03-r001-cal-20230318T012144Z-tier_dsp.lh5" + ), + lgnd_test_data.get_path( + "lh5/prod-ref-l200/generated/tier/dsp/cal/p03/r001/l200-p03-r001-cal-20230318T012228Z-tier_dsp.lh5" + ), + ] + base_dir = os.path.dirname(files[0]) + + assert utils.expand_path(f"{base_dir}/*20230318T012144Z*") == files[0] + + # Should fail if file not found + with pytest.raises(FileNotFoundError): + utils.expand_path(f"{base_dir}/not_a_real_file.lh5") + + # Should fail if multiple files found + with pytest.raises(FileNotFoundError): + utils.expand_path(f"{base_dir}/*.lh5") + + # Check if it finds a list of files correctly + assert sorted(utils.expand_path(f"{base_dir}/*.lh5", list=True)) == sorted(files) From 063fdb9c324131ea28137519c0c19ed1bca6a173 Mon Sep 17 00:00:00 2001 From: Neuberger Date: Fri, 24 Nov 2023 11:36:35 +0100 Subject: [PATCH 07/13] Everything adjusted and passes tests --- src/lgdo/types/array.py | 2 +- src/lgdo/types/arrayofequalsizedarrays.py | 2 +- src/lgdo/types/encoded.py | 2 +- src/lgdo/types/scalar.py | 2 +- src/lgdo/types/vectorofvectors.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/lgdo/types/array.py b/src/lgdo/types/array.py index 30a47bd2..54fd76f3 100644 --- a/src/lgdo/types/array.py +++ b/src/lgdo/types/array.py @@ -10,7 +10,7 @@ import numpy as np -from .. import lgdo_utils as utils +from .. import utils as utils from .lgdo import LGDO log = logging.getLogger(__name__) diff --git a/src/lgdo/types/arrayofequalsizedarrays.py b/src/lgdo/types/arrayofequalsizedarrays.py index 95884bc9..bf16ed8d 100644 --- a/src/lgdo/types/arrayofequalsizedarrays.py +++ b/src/lgdo/types/arrayofequalsizedarrays.py @@ -9,7 +9,7 @@ import numpy as np -from .. import lgdo_utils as utils +from .. import utils as utils from . import vectorofvectors as vov from .array import Array diff --git a/src/lgdo/types/encoded.py b/src/lgdo/types/encoded.py index 68886273..766001b3 100644 --- a/src/lgdo/types/encoded.py +++ b/src/lgdo/types/encoded.py @@ -6,7 +6,7 @@ import numpy as np from numpy.typing import NDArray -from .. import lgdo_utils as utils +from .. import utils as utils from .array import Array from .lgdo import LGDO from .scalar import Scalar diff --git a/src/lgdo/types/scalar.py b/src/lgdo/types/scalar.py index 6b793137..e79bb932 100644 --- a/src/lgdo/types/scalar.py +++ b/src/lgdo/types/scalar.py @@ -7,7 +7,7 @@ import numpy as np -from .. import lgdo_utils as utils +from .. import utils as utils from .lgdo import LGDO log = logging.getLogger(__name__) diff --git a/src/lgdo/types/vectorofvectors.py b/src/lgdo/types/vectorofvectors.py index 7d227a52..2b0d7f13 100644 --- a/src/lgdo/types/vectorofvectors.py +++ b/src/lgdo/types/vectorofvectors.py @@ -13,7 +13,7 @@ import numpy as np from numpy.typing import DTypeLike, NDArray -from .. import lgdo_utils as utils +from .. import utils as utils from . import arrayofequalsizedarrays as aoesa from .array import Array from .lgdo import LGDO From e1ac27919587ee3c4681eb0a09e62457f53f808c Mon Sep 17 00:00:00 2001 From: Neuberger Date: Fri, 24 Nov 2023 11:57:08 +0100 Subject: [PATCH 08/13] Also updated the docs agian --- docs/source/extensions/numbadoc.py | 2 +- docs/source/notebooks/DataCompression.ipynb | 44 ++++++++++----------- docs/source/notebooks/LH5Files.ipynb | 22 +++++------ 3 files changed, 31 insertions(+), 37 deletions(-) diff --git a/docs/source/extensions/numbadoc.py b/docs/source/extensions/numbadoc.py index 5b4202a1..a49b4a90 100644 --- a/docs/source/extensions/numbadoc.py +++ b/docs/source/extensions/numbadoc.py @@ -27,7 +27,7 @@ def import_object(self) -> bool: """ success = super().import_object() if success: - # Store away numba wrapper + # LH5Store away numba wrapper self.jitobj = self.object # And bend references to underlying python function if hasattr(self.object, "py_func"): diff --git a/docs/source/notebooks/DataCompression.ipynb b/docs/source/notebooks/DataCompression.ipynb index fad9c9bc..dc6636ca 100644 --- a/docs/source/notebooks/DataCompression.ipynb +++ b/docs/source/notebooks/DataCompression.ipynb @@ -61,8 +61,8 @@ "metadata": {}, "outputs": [], "source": [ - "store = lgdo.LH5Store()\n", - "store.write_object(data, \"data\", \"data.lh5\", wo_mode=\"of\")\n", + "store = lgdo.lh5.Store()\n", + "store.write(data, \"data\", \"data.lh5\", wo_mode=\"of\")\n", "lgdo.show(\"data.lh5\")" ] }, @@ -110,7 +110,7 @@ "metadata": {}, "outputs": [], "source": [ - "lgdo.lh5_store.DEFAULT_HDF5_SETTINGS" + "lgdo.lh5.store.DEFAULT_HDF5_SETTINGS" ] }, { @@ -131,18 +131,18 @@ "outputs": [], "source": [ "# use another built-in filter\n", - "lgdo.lh5_store.DEFAULT_HDF5_SETTINGS = {\"compression\": \"lzf\"}\n", + "lgdo.lh5.store.DEFAULT_HDF5_SETTINGS = {\"compression\": \"lzf\"}\n", "\n", "# specify filter name and options\n", - "lgdo.lh5_store.DEFAULT_HDF5_SETTINGS = {\"compression\": \"gzip\", \"compression_opts\": 7}\n", + "lgdo.lh5.store.DEFAULT_HDF5_SETTINGS = {\"compression\": \"gzip\", \"compression_opts\": 7}\n", "\n", "# specify a registered filter provided by hdf5plugin\n", "import hdf5plugin\n", "\n", - "lgdo.lh5_store.DEFAULT_HDF5_SETTINGS = {\"compression\": hdf5plugin.Blosc()}\n", + "lgdo.lh5.store.DEFAULT_HDF5_SETTINGS = {\"compression\": hdf5plugin.Blosc()}\n", "\n", "# shuffle bytes before compressing (typically better compression ratio with no performance penalty)\n", - "lgdo.lh5_store.DEFAULT_HDF5_SETTINGS = {\"shuffle\": True, \"compression\": \"lzf\"}" + "lgdo.lh5.store.DEFAULT_HDF5_SETTINGS = {\"shuffle\": True, \"compression\": \"lzf\"}" ] }, { @@ -166,7 +166,7 @@ "metadata": {}, "outputs": [], "source": [ - "store.write_object(data, \"data\", \"data.lh5\", wo_mode=\"of\")\n", + "store.write(data, \"data\", \"data.lh5\", wo_mode=\"of\")\n", "show_h5ds_opts(\"data/col1\")" ] }, @@ -175,7 +175,7 @@ "id": "f597a9e2", "metadata": {}, "source": [ - "Nice. Shuffling bytes before compressing significantly reduced size on disk. Last but not least, `create_dataset()` keyword arguments can be passed to `write_object()`. They will be forwarded as is, overriding default settings." + "Nice. Shuffling bytes before compressing significantly reduced size on disk. Last but not least, `create_dataset()` keyword arguments can be passed to `write()`. They will be forwarded as is, overriding default settings." ] }, { @@ -185,9 +185,7 @@ "metadata": {}, "outputs": [], "source": [ - "store.write_object(\n", - " data, \"data\", \"data.lh5\", wo_mode=\"of\", shuffle=True, compression=\"gzip\"\n", - ")\n", + "store.write(data, \"data\", \"data.lh5\", wo_mode=\"of\", shuffle=True, compression=\"gzip\")\n", "show_h5ds_opts(\"data/col1\")" ] }, @@ -207,7 +205,7 @@ "outputs": [], "source": [ "data[\"col2\"].attrs[\"hdf5_settings\"] = {\"compression\": \"gzip\"}\n", - "store.write_object(data, \"data\", \"data.lh5\", wo_mode=\"of\")\n", + "store.write(data, \"data\", \"data.lh5\", wo_mode=\"of\")\n", "\n", "show_h5ds_opts(\"data/col1\")\n", "show_h5ds_opts(\"data/col2\")" @@ -221,7 +219,7 @@ "We are now storing table columns with different compression settings.\n", "\n", "
\n", - "**Note:** since any [h5py.Group.create_dataset()](https://docs.h5py.org/en/stable/high/group.html#h5py.Group.create_dataset) keyword argument can be used in `write_object()` or set in the `hdf5_settings` attribute, other HDF5 dataset settings can be configured, like the chunk size.\n", + "**Note:** since any [h5py.Group.create_dataset()](https://docs.h5py.org/en/stable/high/group.html#h5py.Group.create_dataset) keyword argument can be used in `write()` or set in the `hdf5_settings` attribute, other HDF5 dataset settings can be configured, like the chunk size.\n", "
" ] }, @@ -232,7 +230,7 @@ "metadata": {}, "outputs": [], "source": [ - "store.write_object(data, \"data\", \"data.lh5\", wo_mode=\"of\", chunks=2)" + "store.write(data, \"data\", \"data.lh5\", wo_mode=\"of\", chunks=2)" ] }, { @@ -257,7 +255,7 @@ "from legendtestdata import LegendTestData\n", "\n", "ldata = LegendTestData()\n", - "wfs, n_rows = store.read_object(\n", + "wfs, n_rows = store.read(\n", " \"geds/raw/waveform\",\n", " ldata.get_path(\"lh5/LDQTA_r117_20200110T105115Z_cal_geds_raw.lh5\"),\n", ")\n", @@ -347,7 +345,7 @@ " t0=wfs.t0,\n", " dt=wfs.dt,\n", ")\n", - "store.write_object(enc_wfs, \"waveforms\", \"data.lh5\", wo_mode=\"o\")\n", + "store.write(enc_wfs, \"waveforms\", \"data.lh5\", wo_mode=\"o\")\n", "lgdo.show(\"data.lh5\", attrs=True)" ] }, @@ -372,7 +370,7 @@ "metadata": {}, "outputs": [], "source": [ - "obj, _ = store.read_object(\"waveforms\", \"data.lh5\")\n", + "obj, _ = store.read(\"waveforms\", \"data.lh5\")\n", "obj.values" ] }, @@ -391,7 +389,7 @@ "metadata": {}, "outputs": [], "source": [ - "obj, _ = store.read_object(\"waveforms\", \"data.lh5\", decompress=False)\n", + "obj, _ = store.read(\"waveforms\", \"data.lh5\", decompress=False)\n", "obj.values" ] }, @@ -433,9 +431,9 @@ "from lgdo.compression import ULEB128ZigZagDiff\n", "\n", "wfs.values.attrs[\"compression\"] = ULEB128ZigZagDiff()\n", - "store.write_object(wfs, \"waveforms\", \"data.lh5\", wo_mode=\"of\")\n", + "store.write(wfs, \"waveforms\", \"data.lh5\", wo_mode=\"of\")\n", "\n", - "obj, _ = store.read_object(\"waveforms\", \"data.lh5\", decompress=False)\n", + "obj, _ = store.read(\"waveforms\", \"data.lh5\", decompress=False)\n", "obj.values.attrs[\"codec\"]" ] }, @@ -447,8 +445,8 @@ "Further reading:\n", "\n", "- [Available waveform compression algorithms](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.compression.html)\n", - "- [read_object() docstring](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.html#lgdo.lh5_store.LH5Store.read_object)\n", - "- [write_object() docstring](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.html#lgdo.lh5_store.LH5Store.write_object)" + "- [read() docstring](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.html#lgdo.lh5.store.LH5Store.read)\n", + "- [write() docstring](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.html#lgdo.lh5_store.LH5Store.write)" ] } ], diff --git a/docs/source/notebooks/LH5Files.ipynb b/docs/source/notebooks/LH5Files.ipynb index 8563f4bd..12383d1e 100644 --- a/docs/source/notebooks/LH5Files.ipynb +++ b/docs/source/notebooks/LH5Files.ipynb @@ -38,7 +38,7 @@ "id": "c136b537", "metadata": {}, "source": [ - "We can use `lgdo.lh5_store.ls()` [[docs]](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.html#lgdo.lh5_store.ls) to inspect the file contents:" + "We can use `lgdo.lh5.ls()` [[docs]](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.html#lgdo.lh5.ls) to inspect the file contents:" ] }, { @@ -131,7 +131,7 @@ "metadata": {}, "outputs": [], "source": [ - "store.read_object(\"geds/raw\", lh5_file)" + "store.read(\"geds/raw\", lh5_file)" ] }, { @@ -149,7 +149,7 @@ "metadata": {}, "outputs": [], "source": [ - "obj, n_rows = store.read_object(\"geds/raw/timestamp\", lh5_file)\n", + "obj, n_rows = store.read(\"geds/raw/timestamp\", lh5_file)\n", "obj" ] }, @@ -170,7 +170,7 @@ "metadata": {}, "outputs": [], "source": [ - "obj, n_rows = store.read_object(\"geds/raw/timestamp\", lh5_file, start_row=15, n_rows=10)\n", + "obj, n_rows = store.read(\"geds/raw/timestamp\", lh5_file, start_row=15, n_rows=10)\n", "print(obj)" ] }, @@ -189,7 +189,7 @@ "metadata": {}, "outputs": [], "source": [ - "obj, n_rows = store.read_object(\n", + "obj, n_rows = store.read(\n", " \"geds/raw\", lh5_file, field_mask=(\"timestamp\", \"energy\"), idx=[1, 3, 7, 9, 10, 15]\n", ")\n", "print(obj)" @@ -200,7 +200,7 @@ "id": "b3f52d77", "metadata": {}, "source": [ - "As you might have noticed, `read_object()` loads all the requested data in memory at once. This can be a problem when dealing with large datasets. `LH5Iterator` [[docs]](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.html#lgdo.lh5_store.LH5Iterator) makes it possible to handle data one chunk at a time (sequentially) to avoid running out of memory:" + "As you might have noticed, `read_object()` loads all the requested data in memory at once. This can be a problem when dealing with large datasets. `LH5Iterator` [[docs]](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.html#lgdo.lh5_store.Iterator) makes it possible to handle data one chunk at a time (sequentially) to avoid running out of memory:" ] }, { @@ -260,9 +260,7 @@ "source": [ "store = LH5Store()\n", "\n", - "store.write_object(\n", - " scalar, name=\"message\", lh5_file=\"my_objects.lh5\", wo_mode=\"overwrite_file\"\n", - ")" + "store.write(scalar, name=\"message\", lh5_file=\"my_objects.lh5\", wo_mode=\"overwrite_file\")" ] }, { @@ -300,10 +298,8 @@ "metadata": {}, "outputs": [], "source": [ - "store.write_object(array, name=\"numbers\", group=\"closet\", lh5_file=\"my_objects.lh5\")\n", - "store.write_object(\n", - " wf_table, name=\"waveforms\", group=\"closet\", lh5_file=\"my_objects.lh5\"\n", - ")\n", + "store.write(array, name=\"numbers\", group=\"closet\", lh5_file=\"my_objects.lh5\")\n", + "store.write(wf_table, name=\"waveforms\", group=\"closet\", lh5_file=\"my_objects.lh5\")\n", "show(\"my_objects.lh5\")" ] }, From c6a82c61436b104c89747fbb4f7bda66bb992da1 Mon Sep 17 00:00:00 2001 From: Neuberger Date: Fri, 24 Nov 2023 12:28:21 +0100 Subject: [PATCH 09/13] Renamed and back to and --- docs/source/notebooks/DataCompression.ipynb | 2 +- src/lgdo/compression/radware.py | 2 +- src/lgdo/lh5/__init__.py | 8 +- src/lgdo/lh5/iterator.py | 12 +-- src/lgdo/lh5/store.py | 10 +-- src/lgdo/lh5_store.py | 32 ++++---- tests/compression/conftest.py | 2 +- tests/compression/test_radware_sigcompress.py | 2 +- tests/lh5/test_lh5_iterator.py | 14 ++-- tests/lh5/test_lh5_store.py | 80 +++++++++---------- 10 files changed, 82 insertions(+), 82 deletions(-) diff --git a/docs/source/notebooks/DataCompression.ipynb b/docs/source/notebooks/DataCompression.ipynb index dc6636ca..36b69b1b 100644 --- a/docs/source/notebooks/DataCompression.ipynb +++ b/docs/source/notebooks/DataCompression.ipynb @@ -61,7 +61,7 @@ "metadata": {}, "outputs": [], "source": [ - "store = lgdo.lh5.Store()\n", + "store = lgdo.lh5.LH5Storeoreore()\n", "store.write(data, \"data\", \"data.lh5\", wo_mode=\"of\")\n", "lgdo.show(\"data.lh5\")" ] diff --git a/src/lgdo/compression/radware.py b/src/lgdo/compression/radware.py index 0332dfb7..58bce975 100644 --- a/src/lgdo/compression/radware.py +++ b/src/lgdo/compression/radware.py @@ -387,7 +387,7 @@ def _radware_sigcompress_encode( changes: - Shift the input signal by `shift` before encoding. - - Store encoded, :class:`numpy.uint16` signal as an array of bytes + - LH5Store encoded, :class:`numpy.uint16` signal as an array of bytes (:class:`numpy.ubyte`), in big-endian ordering. - Declare mask globally to avoid extra memory allocation. - Enable hardware-vectorization with Numba (:func:`numba.guvectorize`). diff --git a/src/lgdo/lh5/__init__.py b/src/lgdo/lh5/__init__.py index 252a7cd2..6263372a 100644 --- a/src/lgdo/lh5/__init__.py +++ b/src/lgdo/lh5/__init__.py @@ -5,12 +5,12 @@ `h5py `_. """ -from .iterator import Iterator -from .store import Store, load_dfs, load_nda, ls, show +from .iterator import LH5Iterator +from .store import LH5Store, load_dfs, load_nda, ls, show __all__ = [ - "Iterator", - "Store", + "LH5Iterator", + "LH5Store", "load_dfs", "load_nda", "ls", diff --git a/src/lgdo/lh5/iterator.py b/src/lgdo/lh5/iterator.py index e56f6d8d..534a7c05 100644 --- a/src/lgdo/lh5/iterator.py +++ b/src/lgdo/lh5/iterator.py @@ -7,13 +7,13 @@ import pandas as pd from ..types import Array, Scalar, Struct, VectorOfVectors -from .store import Store +from .store import LH5Store from .utils import expand_path LGDO = typing.Union[Array, Scalar, Struct, VectorOfVectors] -class Iterator(typing.Iterator): +class LH5Iterator(typing.Iterator): """ A class for iterating through one or more LH5 files, one block of entries at a time. This also accepts an entry list/mask to enable event selection, @@ -29,7 +29,7 @@ class Iterator(typing.Iterator): This can also be used as an iterator: - >>> for lh5_obj, entry, n_rows in Iterator(...): + >>> for lh5_obj, entry, n_rows in LH5Iterator(...): >>> # do the thing! This is intended for if you are reading a large quantity of data but @@ -68,16 +68,16 @@ def __init__( mask of entries to read. If a list of arrays is provided, expect one for each file. Ignore if a selection list is provided. field_mask - mask of which fields to read. See :meth:`Store.read` for + mask of which fields to read. See :meth:`LH5Store.read` for more details. buffer_len number of entries to read at a time while iterating through files. friend - a ''friend'' Iterator that will be read in parallel with this. + a ''friend'' LH5Iterator that will be read in parallel with this. The friend should have the same length and entry list. A single LH5 table containing columns from both iterators will be returned. """ - self.lh5_st = Store(base_path=base_path, keep_open=True) + self.lh5_st = LH5Store(base_path=base_path, keep_open=True) # List of files, with wildcards and env vars expanded if isinstance(lh5_files, str): diff --git a/src/lgdo/lh5/store.py b/src/lgdo/lh5/store.py index c0588aea..3c2aa696 100644 --- a/src/lgdo/lh5/store.py +++ b/src/lgdo/lh5/store.py @@ -42,15 +42,15 @@ DEFAULT_HDF5_COMPRESSION = None -class Store: +class LH5Store: """ Class to represent a store of LEGEND HDF5 files. The two main methods implemented by the class are :meth:`read` and :meth:`write`. Examples -------- - >>> from lgdo import Store - >>> store = Store() + >>> from lgdo import LH5Store + >>> store = LH5Store() >>> obj, _ = store.read("/geds/waveform", "file.lh5") >>> type(obj) lgdo.waveform_table.WaveformTable @@ -1309,7 +1309,7 @@ def ls(lh5_file: str | h5py.Group, lh5_group: str = "") -> list[str]: + ("" if lh5_group == "" else f" (and group {lh5_group})") ) - lh5_st = Store() + lh5_st = LH5Store() # To use recursively, make lh5_file a h5group instead of a string if isinstance(lh5_file, str): lh5_file = lh5_st.gimme_file(lh5_file, "r") @@ -1473,7 +1473,7 @@ def load_nda( # Expand wildcards f_list = [f for f_wc in f_list for f in sorted(glob.glob(os.path.expandvars(f_wc)))] - sto = Store() + sto = LH5Store() par_data = {par: [] for par in par_list} for ii, f in enumerate(f_list): f = sto.gimme_file(f, "r") diff --git a/src/lgdo/lh5_store.py b/src/lgdo/lh5_store.py index a404a4f8..aee38b94 100644 --- a/src/lgdo/lh5_store.py +++ b/src/lgdo/lh5_store.py @@ -25,7 +25,7 @@ DEFAULT_HDF5_SETTINGS: dict[str, ...] = {"shuffle": True, "compression": "gzip"} -class LH5Iterator(lh5.Iterator): +class LH5Iterator(lh5.LH5Iterator): def __init__( self, lh5_files: str | list[str], @@ -38,8 +38,8 @@ def __init__( friend: Iterator = None, ) -> None: warn( - "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing Store and Iterator. " - "Please replace 'import lgdo.lh5_store' with 'import lgdo.lh5'. " + "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Store andLH5Iterator." + "We recommend using 'import lgdo.lh5 as lh5' and then creating anLH5Iterator instance via 'lh5.LH5Iterator()'." "lgdo.lh5_store will be removed in a future release.", DeprecationWarning, stacklevel=2, @@ -68,9 +68,9 @@ def write_object( **h5py_kwargs, ) -> None: warn( - "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing Store and Iterator. " - "Please replace 'import lgdo.lh5_store' with 'import lgdo.lh5'. " - "lgdo.lh5_store will be removed in a future release.", + "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Store andLH5Iterator. " + "The object you are calling this function from uses the old LH5Iterator class." + "We recommend using 'import lgdo.lh5 as lh5' and then creating an instance via 'lh5.LH5Iterator()'.", DeprecationWarning, stacklevel=2, ) @@ -99,9 +99,9 @@ def read_object( decompress: bool = True, ) -> tuple[LGDO, int]: warn( - "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing Store and Iterator. " - "Please replace 'import lgdo.lh5_store' with 'import lgdo.lh5'. " - "lgdo.lh5_store will be removed in a future release.", + "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Store andLH5Iterator. " + "The object you are calling this function from uses the old LH5Iterator class." + "We recommend using 'import lgdo.lh5 as lh5' and then creating a LH5Store instance via 'lh5.LH5Store()'.", DeprecationWarning, stacklevel=2, ) @@ -118,11 +118,11 @@ def read_object( ) -class LH5Store(lh5.Store): +class LH5Store(lh5.LH5Store): def __init__(self, base_path: str = "", keep_open: bool = False): warn( - "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing Store and Iterator. " - "Please replace 'import lgdo.lh5_store' with 'import lgdo.lh5'. " + "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Store andLH5Iterator. " + "We recommend using 'import lgdo.lh5 as lh5' and then creating an instance via 'lh5.LH5Iterator()'.", "lgdo.lh5_store will be removed in a future release.", DeprecationWarning, stacklevel=2, @@ -137,7 +137,7 @@ def load_dfs( idx_list: list[np.ndarray | list | tuple] = None, ) -> pd.DataFrame: warn( - "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing Store and Iterator. " + "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Store andLH5Iterator. " "Please replace 'import lgdo.lh5_store' with 'import lgdo.lh5'. " "lgdo.lh5_store will be removed in a future release.", DeprecationWarning, @@ -153,7 +153,7 @@ def load_nda( idx_list: list[np.ndarray | list | tuple] = None, ) -> dict[str, np.ndarray]: warn( - "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing Store and Iterator. " + "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Store andLH5Iterator. " "Please replace 'import lgdo.lh5_store' with 'import lgdo.lh5'. " "lgdo.lh5_store will be removed in a future release.", DeprecationWarning, @@ -164,7 +164,7 @@ def load_nda( def ls(lh5_file: str | h5py.Group, lh5_group: str = "") -> list[str]: warn( - "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing Store and Iterator. " + "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Store andLH5Iterator. " "Please replace 'import lgdo.lh5_store' with 'import lgdo.lh5'. " "lgdo.lh5_store will be removed in a future release.", DeprecationWarning, @@ -181,7 +181,7 @@ def show( header: bool = True, ) -> None: warn( - "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing Store and Iterator. " + "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Store andLH5Iterator. " "Please replace 'import lgdo.lh5_store' with 'import lgdo.lh5'. " "lgdo.lh5_store will be removed in a future release.", DeprecationWarning, diff --git a/tests/compression/conftest.py b/tests/compression/conftest.py index 61f96f73..e69cc307 100644 --- a/tests/compression/conftest.py +++ b/tests/compression/conftest.py @@ -5,7 +5,7 @@ @pytest.fixture() def wftable(lgnd_test_data): - store = lh5.Store() + store = lh5.LH5Store() wft, _ = store.read( "/geds/raw/waveform", lgnd_test_data.get_path("lh5/LDQTA_r117_20200110T105115Z_cal_geds_raw.lh5"), diff --git a/tests/compression/test_radware_sigcompress.py b/tests/compression/test_radware_sigcompress.py index eb458a09..fe0bdd99 100644 --- a/tests/compression/test_radware_sigcompress.py +++ b/tests/compression/test_radware_sigcompress.py @@ -178,7 +178,7 @@ def test_aoesa(wftable): def test_performance(lgnd_test_data): - store = lh5.Store() + store = lh5.LH5Store() obj, _ = store.read( "/geds/raw/waveform", lgnd_test_data.get_path("lh5/LDQTA_r117_20200110T105115Z_cal_geds_raw.lh5"), diff --git a/tests/lh5/test_lh5_iterator.py b/tests/lh5/test_lh5_iterator.py index a9b0419e..95e575af 100644 --- a/tests/lh5/test_lh5_iterator.py +++ b/tests/lh5/test_lh5_iterator.py @@ -11,7 +11,7 @@ def lgnd_file(lgnd_test_data): def test_basics(lgnd_file): - lh5_it = lh5.Iterator( + lh5_it = lh5.LH5Iterator( lgnd_file, "/geds/raw", entry_list=range(100), @@ -35,14 +35,14 @@ def test_basics(lgnd_file): def test_errors(lgnd_file): with pytest.raises(RuntimeError): - lh5.Iterator("non-existent-file.lh5", "random-group") + lh5.LH5Iterator("non-existent-file.lh5", "random-group") with pytest.raises(ValueError): - lh5.Iterator(1, 2) + lh5.LH5Iterator(1, 2) def test_lgnd_waveform_table_fancy_idx(lgnd_file): - lh5_it = lh5.Iterator( + lh5_it = lh5.LH5Iterator( lgnd_file, "geds/raw/waveform", entry_list=[ @@ -97,13 +97,13 @@ def more_lgnd_files(lgnd_test_data): def test_friend(more_lgnd_files): - lh5_raw_it = lh5.Iterator( + lh5_raw_it = lh5.LH5Iterator( more_lgnd_files[0], "ch1084803/raw", field_mask=["waveform", "baseline"], buffer_len=5, ) - lh5_it = lh5.Iterator( + lh5_it = lh5.LH5Iterator( more_lgnd_files[1], "ch1084803/hit", field_mask=["is_valid_0vbb"], @@ -121,7 +121,7 @@ def test_friend(more_lgnd_files): def test_iterate(more_lgnd_files): # iterate through all hit groups in all files; there are 10 entries in # each group/file - lh5_it = lh5.Iterator( + lh5_it = lh5.LH5Iterator( more_lgnd_files[1] * 3, ["ch1084803/hit"] * 2 + ["ch1084804/hit"] * 2 + ["ch1121600/hit"] * 2, field_mask=["is_valid_0vbb", "timestamp", "zacEmax_ctc_cal"], diff --git a/tests/lh5/test_lh5_store.py b/tests/lh5/test_lh5_store.py index 4743dc0d..9d2d254c 100644 --- a/tests/lh5/test_lh5_store.py +++ b/tests/lh5/test_lh5_store.py @@ -20,11 +20,11 @@ def lgnd_file(lgnd_test_data): def test_init(): - lh5.Store() + lh5.LH5Store() def test_gimme_file(lgnd_file): - store = lh5.Store(keep_open=True) + store = lh5.LH5Store(keep_open=True) f = store.gimme_file(lgnd_file) assert isinstance(f, h5py.File) @@ -36,7 +36,7 @@ def test_gimme_file(lgnd_file): def test_gimme_group(lgnd_file, tmptestdir): f = h5py.File(lgnd_file) - store = lh5.Store() + store = lh5.LH5Store() g = store.gimme_group("/geds", f) assert isinstance(g, h5py.Group) @@ -86,7 +86,7 @@ def test_load_nda(lgnd_file): @pytest.fixture(scope="module") def lh5_file(tmptestdir): - store = lh5.Store() + store = lh5.LH5Store() struct = lgdo.Struct() struct.add_field("scalar", lgdo.Scalar(value=10, attrs={"sth": 1})) @@ -184,7 +184,7 @@ def test_write_objects(lh5_file): def test_read_n_rows(lh5_file): - store = lh5.Store() + store = lh5.LH5Store() assert store.read_n_rows("/data/struct_full/aoesa", lh5_file) == 5 assert store.read_n_rows("/data/struct_full/array", lh5_file) == 5 assert store.read_n_rows("/data/struct_full/scalar", lh5_file) is None @@ -196,13 +196,13 @@ def test_read_n_rows(lh5_file): def test_get_buffer(lh5_file): - store = lh5.Store() + store = lh5.LH5Store() buf = store.get_buffer("/data/struct_full/wftable_enc", lh5_file) assert isinstance(buf.values, types.ArrayOfEqualSizedArrays) def test_read_scalar(lh5_file): - store = lh5.Store() + store = lh5.LH5Store() lh5_obj, n_rows = store.read("/data/struct/scalar", lh5_file) assert isinstance(lh5_obj, lgdo.Scalar) assert lh5_obj.value == 10 @@ -213,7 +213,7 @@ def test_read_scalar(lh5_file): def test_read_array(lh5_file): - store = lh5.Store() + store = lh5.LH5Store() lh5_obj, n_rows = store.read("/data/struct/array", lh5_file) assert isinstance(lh5_obj, types.Array) assert (lh5_obj.nda == np.array([2, 3, 4])).all() @@ -226,7 +226,7 @@ def test_read_array(lh5_file): def test_read_array_fancy_idx(lh5_file): - store = lh5.Store() + store = lh5.LH5Store() lh5_obj, n_rows = store.read("/data/struct_full/array", lh5_file, idx=[0, 3, 4]) assert isinstance(lh5_obj, types.Array) assert (lh5_obj.nda == np.array([1, 4, 5])).all() @@ -234,7 +234,7 @@ def test_read_array_fancy_idx(lh5_file): def test_read_vov(lh5_file): - store = lh5.Store() + store = lh5.LH5Store() lh5_obj, n_rows = store.read("/data/struct/vov", lh5_file) assert isinstance(lh5_obj, types.VectorOfVectors) @@ -258,7 +258,7 @@ def test_read_vov(lh5_file): def test_read_vov_fancy_idx(lh5_file): - store = lh5.Store() + store = lh5.LH5Store() lh5_obj, n_rows = store.read("/data/struct_full/vov", lh5_file, idx=[0, 2]) assert isinstance(lh5_obj, types.VectorOfVectors) @@ -271,7 +271,7 @@ def test_read_vov_fancy_idx(lh5_file): def test_read_voev(lh5_file): - store = lh5.Store() + store = lh5.LH5Store() lh5_obj, n_rows = store.read("/data/struct/voev", lh5_file, decompress=False) assert isinstance(lh5_obj, types.VectorOfEncodedVectors) @@ -301,7 +301,7 @@ def test_read_voev(lh5_file): def test_read_voev_fancy_idx(lh5_file): - store = lh5.Store() + store = lh5.LH5Store() lh5_obj, n_rows = store.read( "/data/struct_full/voev", lh5_file, idx=[0, 2], decompress=False ) @@ -316,14 +316,14 @@ def test_read_voev_fancy_idx(lh5_file): def test_read_aoesa(lh5_file): - store = lh5.Store() + store = lh5.LH5Store() lh5_obj, n_rows = store.read("/data/struct/aoesa", lh5_file) assert isinstance(lh5_obj, types.ArrayOfEqualSizedArrays) assert (lh5_obj.nda == np.full((3, 5), fill_value=42)).all() def test_read_table(lh5_file): - store = lh5.Store() + store = lh5.LH5Store() lh5_obj, n_rows = store.read("/data/struct/table", lh5_file) assert isinstance(lh5_obj, types.Table) assert n_rows == 3 @@ -335,7 +335,7 @@ def test_read_table(lh5_file): def test_read_hdf5_compressed_data(lh5_file): - store = lh5.Store() + store = lh5.LH5Store() lh5_obj, n_rows = store.read("/data/struct/table", lh5_file) assert "compression" not in lh5_obj["b"].attrs @@ -351,7 +351,7 @@ def test_read_hdf5_compressed_data(lh5_file): def test_read_wftable(lh5_file): - store = lh5.Store() + store = lh5.LH5Store() lh5_obj, n_rows = store.read("/data/struct/wftable", lh5_file) assert isinstance(lh5_obj, types.WaveformTable) assert n_rows == 3 @@ -376,7 +376,7 @@ def test_read_wftable(lh5_file): def test_read_wftable_encoded(lh5_file): - store = lh5.Store() + store = lh5.LH5Store() lh5_obj, n_rows = store.read("/data/struct/wftable_enc", lh5_file, decompress=False) assert isinstance(lh5_obj, types.WaveformTable) assert isinstance(lh5_obj.values, types.ArrayOfEncodedEqualSizedArrays) @@ -426,7 +426,7 @@ def test_read_wftable_encoded(lh5_file): def test_read_with_field_mask(lh5_file): - store = lh5.Store() + store = lh5.LH5Store() lh5_obj, n_rows = store.read("/data/struct_full", lh5_file, field_mask=["array"]) assert list(lh5_obj.keys()) == ["array"] @@ -455,7 +455,7 @@ def test_read_with_field_mask(lh5_file): def test_read_lgnd_array(lgnd_file): - store = lh5.Store() + store = lh5.LH5Store() lh5_obj, n_rows = store.read("/geds/raw/baseline", lgnd_file) assert isinstance(lh5_obj, types.Array) @@ -467,7 +467,7 @@ def test_read_lgnd_array(lgnd_file): def test_read_lgnd_array_fancy_idx(lgnd_file): - store = lh5.Store() + store = lh5.LH5Store() lh5_obj, n_rows = store.read( "/geds/raw/baseline", lgnd_file, idx=[2, 4, 6, 9, 11, 16, 68] @@ -479,7 +479,7 @@ def test_read_lgnd_array_fancy_idx(lgnd_file): def test_read_lgnd_vov(lgnd_file): - store = lh5.Store() + store = lh5.LH5Store() lh5_obj, n_rows = store.read("/geds/raw/tracelist", lgnd_file) assert isinstance(lh5_obj, types.VectorOfVectors) @@ -488,7 +488,7 @@ def test_read_lgnd_vov(lgnd_file): def test_read_lgnd_vov_fancy_idx(lgnd_file): - store = lh5.Store() + store = lh5.LH5Store() lh5_obj, n_rows = store.read( "/geds/raw/tracelist", lgnd_file, idx=[2, 4, 6, 9, 11, 16, 68] @@ -501,7 +501,7 @@ def test_read_lgnd_vov_fancy_idx(lgnd_file): def test_read_array_concatenation(lgnd_file): - store = lh5.Store() + store = lh5.LH5Store() lh5_obj, n_rows = store.read("/geds/raw/baseline", [lgnd_file, lgnd_file]) assert isinstance(lh5_obj, types.Array) assert n_rows == 200 @@ -509,7 +509,7 @@ def test_read_array_concatenation(lgnd_file): def test_read_lgnd_waveform_table(lgnd_file): - store = lh5.Store() + store = lh5.LH5Store() lh5_obj, n_rows = store.read("/geds/raw/waveform", lgnd_file) assert isinstance(lh5_obj, types.WaveformTable) @@ -528,7 +528,7 @@ def test_read_lgnd_waveform_table(lgnd_file): def test_read_lgnd_waveform_table_fancy_idx(lgnd_file): - store = lh5.Store() + store = lh5.LH5Store() lh5_obj, n_rows = store.read( "/geds/raw/waveform", @@ -541,7 +541,7 @@ def test_read_lgnd_waveform_table_fancy_idx(lgnd_file): @pytest.fixture(scope="module") def enc_lgnd_file(lgnd_file, tmptestdir): - store = lh5.Store() + store = lh5.LH5Store() wft, n_rows = store.read("/geds/raw/waveform", lgnd_file) wft.values.attrs["compression"] = RadwareSigcompress(codec_shift=-32768) store.write( @@ -558,14 +558,14 @@ def test_write_compressed_lgnd_waveform_table(enc_lgnd_file): def test_read_compressed_lgnd_waveform_table(lgnd_file, enc_lgnd_file): - store = lh5.Store() + store = lh5.LH5Store() wft, _ = store.read("/geds/raw/waveform", enc_lgnd_file) assert isinstance(wft.values, types.ArrayOfEqualSizedArrays) assert "compression" not in wft.values.attrs def test_write_with_hdf5_compression(lgnd_file, tmptestdir): - store = lh5.Store() + store = lh5.LH5Store() wft, n_rows = store.read("/geds/raw/waveform", lgnd_file) store.write( wft, @@ -606,7 +606,7 @@ def test_write_object_overwrite_table_no_deletion(caplog, tmptestdir): tb2 = types.Table( col_dict={"dset1": types.Array(np.ones(10))} ) # Same field name, different values - store = lh5.Store() + store = lh5.LH5Store() store.write(tb1, "my_group", f"{tmptestdir}/write_object_overwrite_test.lh5") store.write( tb2, @@ -637,7 +637,7 @@ def test_write_object_overwrite_table_with_deletion(caplog, tmptestdir): tb2 = types.Table( col_dict={"dset2": types.Array(np.ones(10))} ) # Same field name, different values - store = lh5.Store() + store = lh5.LH5Store() store.write(tb1, "my_group", f"{tmptestdir}/write_object_overwrite_test.lh5") store.write( tb2, @@ -662,7 +662,7 @@ def test_write_object_overwrite_table_with_deletion(caplog, tmptestdir): tb2 = types.Table( col_dict={"dset2": types.Array(np.ones(10))} ) # Same field name, different values - store = lh5.Store() + store = lh5.LH5Store() store.write( tb1, "my_table", @@ -713,7 +713,7 @@ def test_write_object_overwrite_lgdo(caplog, tmptestdir): values=np.ones((10, 10)), values_units="ADC", ) # Same field name, different values - store = lh5.Store() + store = lh5.LH5Store() store.write( tb1, "my_table", @@ -742,7 +742,7 @@ def test_write_object_overwrite_lgdo(caplog, tmptestdir): # Now try overwriting an array, and test the write_start argument array1 = types.Array(nda=np.zeros(10)) array2 = types.Array(nda=np.ones(20)) - store = lh5.Store() + store = lh5.LH5Store() store.write(array1, "my_array", f"{tmptestdir}/write_object_overwrite_test.lh5") store.write( array2, @@ -763,7 +763,7 @@ def test_write_object_overwrite_lgdo(caplog, tmptestdir): # Now try overwriting a scalar scalar1 = types.Scalar(0) scalar2 = types.Scalar(1) - store = lh5.Store() + store = lh5.LH5Store() store.write(scalar1, "my_scalar", f"{tmptestdir}/write_object_overwrite_test.lh5") store.write( scalar2, @@ -782,7 +782,7 @@ def test_write_object_overwrite_lgdo(caplog, tmptestdir): # Finally, try overwriting a vector of vectors vov1 = types.VectorOfVectors(listoflists=[np.zeros(1), np.ones(2), np.zeros(3)]) vov2 = types.VectorOfVectors(listoflists=[np.ones(1), np.zeros(2), np.ones(3)]) - store = lh5.Store() + store = lh5.LH5Store() store.write(vov1, "my_vector", f"{tmptestdir}/write_object_overwrite_test.lh5") store.write( vov2, @@ -808,7 +808,7 @@ def test_write_object_append_column(tmptestdir): array1 = types.Array(np.zeros(10)) tb1 = types.Table(col_dict={"dset1`": types.Array(np.ones(10))}) - store = lh5.Store() + store = lh5.LH5Store() store.write(array1, "my_table", f"{tmptestdir}/write_object_append_column_test.lh5") with pytest.raises(RuntimeError) as exc_info: store.write( @@ -836,7 +836,7 @@ def test_write_object_append_column(tmptestdir): tb2 = types.Table( col_dict={"dset2": types.Array(np.ones(10))} ) # Same field name, different values - store = lh5.Store() + store = lh5.LH5Store() store.write(tb1, "my_table", f"{tmptestdir}/write_object_append_column_test.lh5") with pytest.raises(ValueError) as exc_info: store.write( @@ -860,7 +860,7 @@ def test_write_object_append_column(tmptestdir): tb2 = types.Table( col_dict={"dset2": types.Array(np.ones(20))} ) # different field name, different size - store = lh5.Store() + store = lh5.LH5Store() store.write(tb1, "my_table", f"{tmptestdir}/write_object_append_column_test.lh5") with pytest.raises(ValueError) as exc_info: store.write( @@ -884,7 +884,7 @@ def test_write_object_append_column(tmptestdir): tb2 = types.Table( col_dict={"dset2": types.Array(np.ones(10))} ) # different field name, different size - store = lh5.Store() + store = lh5.LH5Store() store.write( tb1, "my_table", From 9a05121d8e078f72596fcc1e589b8a9a7639025c Mon Sep 17 00:00:00 2001 From: Neuberger Date: Fri, 24 Nov 2023 12:35:26 +0100 Subject: [PATCH 10/13] Renamed and back to and --- src/lgdo/lgdo_utils.py | 28 ++++++++++++++++------------ src/lgdo/lh5_store.py | 34 ++++++++++++++++++---------------- 2 files changed, 34 insertions(+), 28 deletions(-) diff --git a/src/lgdo/lgdo_utils.py b/src/lgdo/lgdo_utils.py index 14eeee1b..d9044678 100644 --- a/src/lgdo/lgdo_utils.py +++ b/src/lgdo/lgdo_utils.py @@ -19,9 +19,10 @@ def copy(obj: lgdo.LGDO, dtype: np.dtype = None) -> None: def get_element_type(obj: object) -> str: warn( - "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Store and LH5Iterator. " - "Please replace 'import lgdo.lh5_store' with 'import lgdo.lh5'. " - "lgdo.lh5_store will be removed in a future release.", + "'lgdo.lgdo_utils' has been renamed to 'lgdo.utils'. " + "Please replace either 'import lgdo.lgdo_utils as utils' with 'import lgdo.utils as utils' " + "or 'from lgdo.lgdo_utils import get_element_type' with 'from lgdo.utils import get_element_type'." + "'lgdo.lgdo_utils' will be removed in a future release.", DeprecationWarning, stacklevel=2, ) @@ -30,9 +31,10 @@ def get_element_type(obj: object) -> str: def parse_datatype(datatype: str) -> tuple[str, tuple[int, ...], str | list[str]]: warn( - "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Store and LH5Iterator. " - "Please replace 'import lgdo.lh5_store' with 'import lgdo.lh5'. " - "lgdo.lh5_store will be removed in a future release.", + "'lgdo.lgdo_utils' has been renamed to 'lgdo.utils'. " + "Please replace either 'import lgdo.lgdo_utils as utils' with 'import lgdo.utils as utils' " + "or 'from lgdo.lgdo_utils import parse_datatype' with 'from lgdo.utils import parse_datatype'." + "'lgdo.lgdo_utils' will be removed in a future release.", DeprecationWarning, stacklevel=2, ) @@ -41,9 +43,10 @@ def parse_datatype(datatype: str) -> tuple[str, tuple[int, ...], str | list[str] def expand_vars(expr: str, substitute: dict[str, str] = None) -> str: warn( - "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Store and LH5Iterator. " - "Please replace 'import lgdo.lh5_store' with 'import lgdo.lh5'. " - "lgdo.lh5_store will be removed in a future release.", + "'lgdo.lgdo_utils' has been renamed to 'lgdo.utils'. " + "Please replace either 'import lgdo.lgdo_utils as utils' with 'import lgdo.utils as utils' " + "or 'from lgdo.lgdo_utils import expand_vars' with 'from lgdo.utils import expand_vars'." + "'lgdo.lgdo_utils' will be removed in a future release.", DeprecationWarning, stacklevel=2, ) @@ -57,9 +60,10 @@ def expand_path( base_path: str = None, ) -> str | list: warn( - "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Store and LH5Iterator. " - "Please replace 'import lgdo.lh5_store' with 'import lgdo.lh5'. " - "lgdo.lh5_store will be removed in a future release.", + "'lgdo.lgdo_utils' has been renamed to 'lgdo.utils'. " + "Please replace either 'import lgdo.lgdo_utils as utils' with 'import lgdo.utils as utils' " + "or 'from lgdo.lgdo_utils import expand_path' with 'from lgdo.utils import expand_path'." + "'lgdo.lgdo_utils' will be removed in a future release.", DeprecationWarning, stacklevel=2, ) diff --git a/src/lgdo/lh5_store.py b/src/lgdo/lh5_store.py index aee38b94..ce8b72cd 100644 --- a/src/lgdo/lh5_store.py +++ b/src/lgdo/lh5_store.py @@ -38,8 +38,8 @@ def __init__( friend: Iterator = None, ) -> None: warn( - "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Store andLH5Iterator." - "We recommend using 'import lgdo.lh5 as lh5' and then creating anLH5Iterator instance via 'lh5.LH5Iterator()'." + "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Iterator." + "Please replace 'from lgdo.lh5_store import LH5Iterator' with 'from lgdo.lh5 import LH5Iterator'." "lgdo.lh5_store will be removed in a future release.", DeprecationWarning, stacklevel=2, @@ -68,9 +68,10 @@ def write_object( **h5py_kwargs, ) -> None: warn( - "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Store andLH5Iterator. " + "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Iterator. " "The object you are calling this function from uses the old LH5Iterator class." - "We recommend using 'import lgdo.lh5 as lh5' and then creating an instance via 'lh5.LH5Iterator()'.", + "Please replace 'from lgdo.lh5_store import LH5Iterator' with 'from lgdo.lh5 import LH5Iterator'." + "lgdo.lh5_store will be removed in a future release.", DeprecationWarning, stacklevel=2, ) @@ -99,9 +100,10 @@ def read_object( decompress: bool = True, ) -> tuple[LGDO, int]: warn( - "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Store andLH5Iterator. " + "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Iterator. " "The object you are calling this function from uses the old LH5Iterator class." - "We recommend using 'import lgdo.lh5 as lh5' and then creating a LH5Store instance via 'lh5.LH5Store()'.", + "Please replace 'from lgdo.lh5_store import LH5Iterator' with 'from lgdo.lh5 import LH5Iterator'." + "lgdo.lh5_store will be removed in a future release.", DeprecationWarning, stacklevel=2, ) @@ -121,8 +123,8 @@ def read_object( class LH5Store(lh5.LH5Store): def __init__(self, base_path: str = "", keep_open: bool = False): warn( - "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Store andLH5Iterator. " - "We recommend using 'import lgdo.lh5 as lh5' and then creating an instance via 'lh5.LH5Iterator()'.", + "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Store. " + "Please replace 'from lgdo.lh5_store import LH5Store' with 'from lgdo.lh5 import LH5Store'." "lgdo.lh5_store will be removed in a future release.", DeprecationWarning, stacklevel=2, @@ -137,8 +139,8 @@ def load_dfs( idx_list: list[np.ndarray | list | tuple] = None, ) -> pd.DataFrame: warn( - "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Store andLH5Iterator. " - "Please replace 'import lgdo.lh5_store' with 'import lgdo.lh5'. " + "lgdo.lh5_store has moved to a subfolder lgdo.lh5. " + "Please replace 'from lgdo.lh5_store import load_dfs' with 'from lgdo.lh5 import load_dfs'. " "lgdo.lh5_store will be removed in a future release.", DeprecationWarning, stacklevel=2, @@ -153,8 +155,8 @@ def load_nda( idx_list: list[np.ndarray | list | tuple] = None, ) -> dict[str, np.ndarray]: warn( - "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Store andLH5Iterator. " - "Please replace 'import lgdo.lh5_store' with 'import lgdo.lh5'. " + "lgdo.lh5_store has moved to a subfolder lgdo.lh5. " + "Please replace 'from lgdo.lh5_store import load_nda' with 'from lgdo.lh5 import load_nda'. " "lgdo.lh5_store will be removed in a future release.", DeprecationWarning, stacklevel=2, @@ -164,8 +166,8 @@ def load_nda( def ls(lh5_file: str | h5py.Group, lh5_group: str = "") -> list[str]: warn( - "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Store andLH5Iterator. " - "Please replace 'import lgdo.lh5_store' with 'import lgdo.lh5'. " + "lgdo.lh5_store has moved to a subfolder lgdo.lh5. " + "Please replace 'from lgdo.lh5_store import ls' with 'from lgdo.lh5 import ls'. " "lgdo.lh5_store will be removed in a future release.", DeprecationWarning, stacklevel=2, @@ -181,8 +183,8 @@ def show( header: bool = True, ) -> None: warn( - "lgdo.lh5_store has moved to a subfolder lgdo.lh5 containing LH5Store andLH5Iterator. " - "Please replace 'import lgdo.lh5_store' with 'import lgdo.lh5'. " + "lgdo.lh5_store has moved to a subfolder lgdo.lh5. " + "Please replace 'from lgdo.lh5_store import show' with 'from lgdo.lh5 import show'. " "lgdo.lh5_store will be removed in a future release.", DeprecationWarning, stacklevel=2, From 6df838c63802c401c459faf2ebe41ca389350b28 Mon Sep 17 00:00:00 2001 From: Neuberger Date: Fri, 24 Nov 2023 12:35:56 +0100 Subject: [PATCH 11/13] Adjusted warning messages to be individual for each function. --- src/lgdo/lgdo_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lgdo/lgdo_utils.py b/src/lgdo/lgdo_utils.py index d9044678..cddd2111 100644 --- a/src/lgdo/lgdo_utils.py +++ b/src/lgdo/lgdo_utils.py @@ -63,7 +63,7 @@ def expand_path( "'lgdo.lgdo_utils' has been renamed to 'lgdo.utils'. " "Please replace either 'import lgdo.lgdo_utils as utils' with 'import lgdo.utils as utils' " "or 'from lgdo.lgdo_utils import expand_path' with 'from lgdo.utils import expand_path'." - "'lgdo.lgdo_utils' will be removed in a future release.", + "'lgdo.lgdo_utils' will be removed in a future release. ", DeprecationWarning, stacklevel=2, ) From 72cd1a36fc9994c7b7c7fcf3f1da075ef809c3a5 Mon Sep 17 00:00:00 2001 From: Neuberger Date: Fri, 24 Nov 2023 12:37:55 +0100 Subject: [PATCH 12/13] Fixed docs --- docs/source/notebooks/DataCompression.ipynb | 2 +- src/lgdo/cli.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/notebooks/DataCompression.ipynb b/docs/source/notebooks/DataCompression.ipynb index 36b69b1b..74a26c92 100644 --- a/docs/source/notebooks/DataCompression.ipynb +++ b/docs/source/notebooks/DataCompression.ipynb @@ -61,7 +61,7 @@ "metadata": {}, "outputs": [], "source": [ - "store = lgdo.lh5.LH5Storeoreore()\n", + "store = lgdo.lh5.LH5Store()\n", "store.write(data, \"data\", \"data.lh5\", wo_mode=\"of\")\n", "lgdo.show(\"data.lh5\")" ] diff --git a/src/lgdo/cli.py b/src/lgdo/cli.py index 24ba56d1..2273579a 100644 --- a/src/lgdo/cli.py +++ b/src/lgdo/cli.py @@ -9,7 +9,7 @@ def lh5ls(): - """:func:`.show` command line interface.""" + """:func:`.lh5.show` command line interface.""" parser = argparse.ArgumentParser( prog="lh5ls", description="Inspect LEGEND HDF5 (LH5) file contents" ) From ff8d6279b2ea6a1ee778bfb58ef3561f2d660959 Mon Sep 17 00:00:00 2001 From: Moritz Neuberger <31659079+MoritzNeuberger@users.noreply.github.com> Date: Fri, 24 Nov 2023 14:25:51 +0100 Subject: [PATCH 13/13] Apply suggestions from code review Co-authored-by: Luigi Pertoldi --- docs/source/extensions/numbadoc.py | 2 +- docs/source/notebooks/LH5Files.ipynb | 2 +- src/lgdo/compression/radware.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/extensions/numbadoc.py b/docs/source/extensions/numbadoc.py index a49b4a90..06de3418 100644 --- a/docs/source/extensions/numbadoc.py +++ b/docs/source/extensions/numbadoc.py @@ -27,7 +27,7 @@ def import_object(self) -> bool: """ success = super().import_object() if success: - # LH5Store away numba wrapper + # store away numba wrapper self.jitobj = self.object # And bend references to underlying python function if hasattr(self.object, "py_func"): diff --git a/docs/source/notebooks/LH5Files.ipynb b/docs/source/notebooks/LH5Files.ipynb index 12383d1e..9c594be9 100644 --- a/docs/source/notebooks/LH5Files.ipynb +++ b/docs/source/notebooks/LH5Files.ipynb @@ -200,7 +200,7 @@ "id": "b3f52d77", "metadata": {}, "source": [ - "As you might have noticed, `read_object()` loads all the requested data in memory at once. This can be a problem when dealing with large datasets. `LH5Iterator` [[docs]](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.html#lgdo.lh5_store.Iterator) makes it possible to handle data one chunk at a time (sequentially) to avoid running out of memory:" + "As you might have noticed, `read_object()` loads all the requested data in memory at once. This can be a problem when dealing with large datasets. `LH5Iterator` [[docs]](https://legend-pydataobj.readthedocs.io/en/stable/api/lgdo.html#lgdo.lh5.iterator.LH5Iterator) makes it possible to handle data one chunk at a time (sequentially) to avoid running out of memory:" ] }, { diff --git a/src/lgdo/compression/radware.py b/src/lgdo/compression/radware.py index 58bce975..0332dfb7 100644 --- a/src/lgdo/compression/radware.py +++ b/src/lgdo/compression/radware.py @@ -387,7 +387,7 @@ def _radware_sigcompress_encode( changes: - Shift the input signal by `shift` before encoding. - - LH5Store encoded, :class:`numpy.uint16` signal as an array of bytes + - Store encoded, :class:`numpy.uint16` signal as an array of bytes (:class:`numpy.ubyte`), in big-endian ordering. - Declare mask globally to avoid extra memory allocation. - Enable hardware-vectorization with Numba (:func:`numba.guvectorize`).