From aebb80fa1df998de1013b4f6cf620ad62dca17d5 Mon Sep 17 00:00:00 2001 From: Ethan-DeBandi99 <16845933+Ethan-DeBandi99@users.noreply.github.com> Date: Fri, 18 Aug 2023 18:34:14 -0400 Subject: [PATCH] Closes #2690 - `ak.load_all` glob expression update (#2692) * Updating ak.load to support desired behavior * Flake8 fix * Fixing segarray read --- arkouda/io.py | 8 ++++++-- arkouda/segarray.py | 4 ++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/arkouda/io.py b/arkouda/io.py index d15fc10767..d9eff2befc 100644 --- a/arkouda/io.py +++ b/arkouda/io.py @@ -1572,7 +1572,8 @@ def load( with a .parquet appended to the prefix_path. Parquet files were previously ALWAYS stored with a ``.parquet`` extension. - This function will be deprecated when glob flags are added to read_* functions + ak.load does not support loading a single file. + For loading single HDF5 files without the _LOCALE#### suffix please use ak.read(). CSV files without the Arkouda Header are not supported. @@ -1589,8 +1590,11 @@ def load( #### is replaced by each locale numbers. Because filetype is inferred during processing, the extension is not required to be a specific format. """ + if "*" in path_prefix: + raise ValueError("Glob expressions not supported by ak.load(). " + "To read files using a glob expression, please use ak.read()") prefix, extension = os.path.splitext(path_prefix) - globstr = f"{prefix}*{extension}" + globstr = f"{prefix}_LOCALE*{extension}" try: file_format = get_filetype(globstr) if file_format.lower() == "infer" else file_format if file_format.lower() == "hdf5": diff --git a/arkouda/segarray.py b/arkouda/segarray.py index eafdae1145..f1b0bce010 100644 --- a/arkouda/segarray.py +++ b/arkouda/segarray.py @@ -1126,9 +1126,9 @@ def read_hdf(cls, prefix_path, dataset="segarray"): ------- SegArray """ - from arkouda.io import load + from arkouda.io import read_hdf - return load(prefix_path, dataset=dataset) + return read_hdf(prefix_path, datasets=dataset) @classmethod def load(cls, prefix_path, dataset="segarray", segment_name="segments", value_name="values"):