From 229c3dae8db8297f9bb2fa4ad5146f399244e83b Mon Sep 17 00:00:00 2001 From: David Stuebe Date: Fri, 1 Dec 2023 17:38:03 +0000 Subject: [PATCH] Decode all stored bytes as string. Assert ref spec version 1. --- kerchunk/grib2.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/kerchunk/grib2.py b/kerchunk/grib2.py index c6d1f410..27a051fb 100644 --- a/kerchunk/grib2.py +++ b/kerchunk/grib2.py @@ -396,15 +396,13 @@ def grib_tree( # TODO allow passing a LazyReferenceMapper as output? zarr_store = {} zroot = zarr.open_group(store=zarr_store) - result = dict(refs=zarr_store) aggregations: Dict[str, List] = defaultdict(list) aggregation_dims: Dict[str, Set] = defaultdict(set) unknown_counter = 0 for msg_ind, group in enumerate(message_groups): - if "version" not in result: - result["version"] = group["version"] + assert group["version"] == 1 gattrs = ujson.loads(group["refs"][".zattrs"]) coordinates = gattrs["coordinates"].split(" ") @@ -518,6 +516,16 @@ def grib_tree( if key not in [".zattrs", ".zgroup"]: zarr_store[f"{path}/{key}"] = value + # Force all stored values to decode as string, not bytes. String should be correct. + # ujson will reject bytes values by default. + # Using 'reject_bytes=False' one write would fail an equality check on read. + zarr_store = { + key: (val.decode() if isinstance(val, bytes) else val) + for key, val in zarr_store.items() + } + # TODO handle other kerchunk reference spec versions? + result = dict(refs=zarr_store, version=1) + return result