Skip to content

Commit

Permalink
added h5json_writer
Browse files Browse the repository at this point in the history
  • Loading branch information
jreadey committed Feb 23, 2025
1 parent 2f546b9 commit 4b9cb68
Show file tree
Hide file tree
Showing 9 changed files with 719 additions and 33 deletions.
2 changes: 1 addition & 1 deletion src/h5json/dset_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def make_new_dset(


# TBD - other properties
dset_json = {"shape": shape_json, "type": type_json, "cpl": cpl}
dset_json = {"shape": shape_json, "type": type_json, "cpl": cpl, "attributes": {}}
dset_json["created"] = time.time()
dset_json["modified"] = None

Expand Down
256 changes: 256 additions & 0 deletions src/h5json/h5json_writer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,256 @@
##############################################################################
# Copyright by The HDF Group. #
# All rights reserved. #
# #
# This file is part of H5Serv (HDF5 REST Server) Service, Libraries and #
# Utilities. The full HDF5 REST Server copyright notice, including #
# terms governing use, modification, and redistribution, is contained in #
# the file COPYING, which can be found at the root of the source code #
# distribution tree. If you do not have access to this file, you may #
# request a copy from help@hdfgroup.org. #
##############################################################################

import json

from .h5writer import H5Writer
from .objid import stripId, getCollectionForId

class H5JsonWriter(H5Writer):
"""
This abstract class defines properties and methods that the Hdf5db class uses for writing to an HDF5
compatible storage medium.
"""


def __init__(
self,
filepath,
append=False,
no_data=False,
app_logger=None
):
super().__init__(filepath, append=append, app_logger=app_logger)
self.alias_db = {}
self.json = {}
self._no_data = no_data
self._root_uuid = None

def flush(self):
""" Write dirty items """
# json writer doesn't support incremental updates, so we'll wait
# for close to write out database
self.log.info("flush")

def close(self):
""" close storage handle """
self.dumpFile()


def _setAlias(self, obj_id, id_set, h5path):
""" add the given h5path to the object's alias list
If the object is a group, recurse through each hard link """
obj_json = self.db.getObjectById(obj_id)
alias_list = self.alias_db[obj_id]
if h5path in alias_list:
return # nothing to do
alias_list.append(h5path)
if getCollectionForId(obj_id) != "groups":
return # done
id_set.add(obj_id) # keep track of objects we've visited to avoid loops
links = obj_json["links"]
if h5path[-1] != '/':
h5path += '/'

for link_name in links:
link_json = links[link_name]
if link_json["class"] == "H5L_TYPE_HARD":
tgt_id = link_json["id"]
if tgt_id in id_set:
self.log.info(f"_setAlias - circular loop found")
else:
self._setAlias(tgt_id, id_set, h5path+link_name)
id_set.remove(obj_id)

def getAliasList(self):
""" update the alias list for each object """
# clear exiting aliases
obj_ids = self.db.getCollection()
for obj_id in obj_ids:
self.alias_db[obj_id] = []

self._setAlias(self._root_uuid, set(), "/")


def dumpAttribute(self, obj_id, attr_name):
self.log.info(f"dumpAttribute: [{attr_name}]")
item = self.db.getAttribute(obj_id, attr_name)
response = {"name": attr_name}
response["type"] = item["type"]
response["shape"] = item["shape"]
if True: #not self.options.D:
if "value" not in item:
self.log.warning("no value key in attribute: " + attr_name)
else:
# dump values unless header -D was passed
response["value"] = item["value"]
return response

def dumpAttributes(self, obj_id):
attrs = self.db.getAttributes(obj_id)
self.log.info(f"dumpAttributes: {obj_id}")
items = []
for attr_name in attrs:
item = self.dumpAttribute(obj_id, attr_name)
items.append(item)

return items

def dumpLink(self, obj_id, name):
item = self.db.getLink(obj_id, name)
response = {"class": item["class"]}
if "id" in item:
tgt_id = item["id"]
response["collection"] = getCollectionForId(tgt_id)
response["id"] = stripId(tgt_id)

for key in item:
if key in ("id", "created", "modified"):
continue
response[key] = item[key]
response["title"] = name
return response

def dumpLinks(self, obj_id):
links = self.db.getLinks(obj_id)
items = []
for link_name in links:
item = self.dumpLink(obj_id, link_name)
items.append(item)
return items

def dumpGroup(self, obj_id):
item = self.db.getObjectById(obj_id)
response = {}
alias = self.alias_db[obj_id]
response["alias"] = alias

if "cpl" in item:
item["creationProperties"] = item["cpl"]
attributes = self.dumpAttributes(obj_id)
if attributes:
response["attributes"] = attributes
links = self.dumpLinks(obj_id)
if links:
response["links"] = links
return response

def dumpGroups(self):
groups = {}
item = self.dumpGroup(self._root_uuid)
root_uuid = stripId(self._root_uuid)
groups[root_uuid] = item
obj_ids = self.db.getCollection("groups")
for obj_id in obj_ids:
if obj_id == self._root_uuid:
continue
item = self.dumpGroup(obj_id)
obj_uuid = stripId(obj_id)
groups[obj_uuid] = item

self.json["groups"] = groups

def dumpDataset(self, obj_id):
response = {}
self.log.info("dumpDataset: " + obj_id)
item = self.db.getObjectById(obj_id)
if "alias" in item:
alias = item["alias"]
if alias:
self.log.info(f"dumpDataset alias: [{alias[0]}]")
response["alias"] = item["alias"]

response["type"] = item["type"]
shapeItem = item["shape"]
shape_rsp = {}
num_elements = 1
shape_rsp["class"] = shapeItem["class"]
if "dims" in shapeItem:
shape_rsp["dims"] = shapeItem["dims"]
for dim in shapeItem["dims"]:
num_elements *= dim
if "maxdims" in shapeItem:
maxdims = []
for dim in shapeItem["maxdims"]:
if dim == 0:
maxdims.append("H5S_UNLIMITED")
else:
maxdims.append(dim)
shape_rsp["maxdims"] = maxdims
response["shape"] = shape_rsp

if "cpl" in item:
response["creationProperties"] = item["cpl"]

attributes = self.dumpAttributes(obj_id)
if attributes:
response["attributes"] = attributes

if not self._no_data:
if num_elements > 0:
value = self.db.getDatasetValues(obj_id)
response["value"] = value # dump values unless header flag was passed
else:
response["value"] = [] # empty list
return response

def dumpDatasets(self):
obj_ids = self.db.getCollection("datasets")
if obj_ids:
datasets = {}
for obj_id in obj_ids:
item = self.dumpDataset(obj_id)
datasets[obj_id] = item

self.json["datasets"] = datasets

def dumpDatatype(self, obj_id):
response = {}
item = self.db.getObjectById(obj_id)
response["alias"] = item["alias"]
response["type"] = item["type"]
if "cpl" in item:
response["creationProperties"] = item["cpl"]
attributes = self.dumpAttributes(obj_id)
if attributes:
response["attributes"] = attributes
return response

def dumpDatatypes(self):
obj_ids = self.db.getCollection("datatypes")
if obj_ids:
datatypes = {}
for obj_id in obj_ids:
item = self.dumpDatatype(obj_id)
datatypes[obj_id] = item

self.json["datatypes"] = datatypes


def dumpFile(self):
self._root_uuid = self.db.getObjectIdByPath("/")

db_version_info = self.db.getVersionInfo()

self.json["apiVersion"] = db_version_info["hdf5-json-version"]
self.json["root"] = stripId(self._root_uuid)
self.getAliasList() # create alias_db with obj_id to alias list dict
self.dumpGroups()

self.dumpDatasets()

self.dumpDatatypes()

print(json.dumps(self.json, sort_keys=True, indent=4))



15 changes: 8 additions & 7 deletions src/h5json/h5py_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
# distribution tree. If you do not have access to this file, you may #
# request a copy from help@hdfgroup.org. #
##############################################################################
import logging

import h5py
import numpy as np

Expand Down Expand Up @@ -42,13 +40,16 @@ def __init__(
filepath,
app_logger=None
):
self._id_map = {}
self._addr_map = {}
"""
if app_logger:
self.log = app_logger
else:
self.log = logging.getLogger()
self._id_map = {}
self._addr_map = {}
self._filepath = filepath
"""
super().__init__(filepath, app_logger=app_logger)
f = h5py.File(self._filepath)
self._f = f
self._root_id = createObjId(obj_type="groups")
Expand Down Expand Up @@ -182,7 +183,7 @@ def _getLinks(self, grp):
return items

def _getGroup(self, grp, include_links=True):
self.log.info("_getGroup alias: [{grp.name}]")
self.log.info(f"_getGroup alias: [{grp.name}]")

item = {"alias": grp.name}

Expand All @@ -192,15 +193,15 @@ def _getGroup(self, grp, include_links=True):
return item

def _getDatatype(self, ctype, include_attrs=True):
self.log.info("getDatatype alias: ]{ctype.name}")
self.log.info(f"getDatatype alias: ]{ctype.name}")
item = {"alias": ctype.name}
item["type"] = getTypeItem(ctype.dtype)

return item


def _getDataset(self, dset):
self.log.info("getDataset alias: [{dset.name}]")
self.log.info(f"getDataset alias: [{dset.name}]")

item = {"alias": dset.name}

Expand Down
9 changes: 7 additions & 2 deletions src/h5json/h5reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
##############################################################################
from abc import ABC, abstractmethod


import logging


class H5Reader(ABC):
Expand All @@ -23,9 +23,14 @@ class H5Reader(ABC):

def __init__(
self,
filepath
filepath,
app_logger=None
):
self._filepath = filepath
if app_logger:
self.log = app_logger
else:
self.log = logging.getLogger()

@abstractmethod
def get_root_id(self):
Expand Down
Loading

0 comments on commit 4b9cb68

Please sign in to comment.