From dd13033f29979c1e672d096c1426cb321ae7833d Mon Sep 17 00:00:00 2001 From: Bryan Lawrence Date: Thu, 22 Feb 2024 10:45:20 +0000 Subject: [PATCH 1/5] Fixing the master (moved play to branch) --- bnl/playing.py | 27 --------------------------- 1 file changed, 27 deletions(-) delete mode 100644 bnl/playing.py diff --git a/bnl/playing.py b/bnl/playing.py deleted file mode 100644 index ebd6b01..0000000 --- a/bnl/playing.py +++ /dev/null @@ -1,27 +0,0 @@ -import pyfive -from pathlib import Path -from pyfive.as_dataobjects import ADataObjects - -MYFILE = 'da193o_25_day__grid_T_198807-198807.nc' -MYFILE = '../tests/chunked.hdf5' -MYPATH = Path(__file__).parent - -#f = h5py.File(MYPATH/MYFILE,'r') -f2 = pyfive.File(MYPATH/MYFILE) -path = 'dataset1' -link_target = f2._links[path] -dsref = ADataObjects(f2.file._fh, link_target) -chunk_index = dsref.get_offset_addresses() -print(chunk_index) - - -#v='tos' -#tos =f2[v] -#v='dataset1' -#print(tos) -#x = tos[2,:] -#print(x) -#print(tos.shape) - - - From 719a2c8ac7c030cd419f2e2ec80a05ec5e2c5d3f Mon Sep 17 00:00:00 2001 From: Bryan Lawrence Date: Thu, 22 Feb 2024 10:51:23 +0000 Subject: [PATCH 2/5] This shouldn't be on the main right now (but it's still on the play branch). --- pyfive/as_dataobjects.py | 67 ---------------------------------------- 1 file changed, 67 deletions(-) delete mode 100644 pyfive/as_dataobjects.py diff --git a/pyfive/as_dataobjects.py b/pyfive/as_dataobjects.py deleted file mode 100644 index b811fb3..0000000 --- a/pyfive/as_dataobjects.py +++ /dev/null @@ -1,67 +0,0 @@ -from .dataobjects import DataObjects, DATA_STORAGE_MSG_TYPE -from .datatype_msg import DatatypeMessage -import numpy as np -from .btree import BTreeV1RawDataChunks - -class ADataObjects(DataObjects): - """ - Subclass of DataObjets which access the chunk addresses for a given slice of data - """ - def __init__(self,*args,**kwargs): - """ - Initialise via super class - """ - super().__init__(*args,**kwargs) - - # not yet sure we need our own copy - self._as_chunk_index=[] - - def get_offset_addresses(self, args=None): - """ - Get the offset addresses for the data requested - """ - - # offset and size from data storage message - msg = self.find_msg_type(DATA_STORAGE_MSG_TYPE)[0] - msg_offset = msg['offset_to_message'] - version, dims, layout_class, property_offset = ( - self._get_data_message_properties(msg_offset)) - - if layout_class == 0: # compact storage - raise NotImplementedError("Compact storage") - elif layout_class == 1: # contiguous storage - return NotImplementedError("Contiguous storage") - if layout_class == 2: # chunked storage - return self._as_get_chunk_addresses(args) - - - def _as_get_chunk_addresses(self, args): - """ - Get the offset addresses associated with all the chunks - known to the b-tree of this object - """ - self._get_chunk_params() - - if self._as_chunk_index == []: - chunk_btree = BTreeV1RawDataChunks( - self.fh, self._chunk_address, self._chunk_dims) - - count = np.prod(self.shape) - itemsize = np.dtype(self.dtype).itemsize - chunk_buffer_size = count * itemsize - - for node in chunk_btree.all_nodes[0]: - for node_key, addr in zip(node['keys'], node['addresses']): - size = chunk_buffer_size - if self.filter_pipeline: - size = node_key['chunk_size'] - start = node_key['chunk_offset'][:-1] - region = [slice(i, i+j) for i, j in zip(start, self.shape)] - self._as_chunk_index.append([region, start, size]) - - if args is not None: - return NotImplementedError - return self._as_chunk_index - - - From 011dedd99201d044b687fbe0f431ecdbc1d625e7 Mon Sep 17 00:00:00 2001 From: bbm Date: Thu, 22 Feb 2024 16:49:06 -0500 Subject: [PATCH 3/5] retrieving maxshape from dataobjects and exposing maxshape property on Dataset --- pyfive/dataobjects.py | 21 +++++++++++++++++++-- pyfive/high_level.py | 5 +++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/pyfive/dataobjects.py b/pyfive/dataobjects.py index 886b48f..7be5d80 100644 --- a/pyfive/dataobjects.py +++ b/pyfive/dataobjects.py @@ -246,7 +246,16 @@ def shape(self): """ Shape of the dataset. """ msg = self.find_msg_type(DATASPACE_MSG_TYPE)[0] msg_offset = msg['offset_to_message'] - return determine_data_shape(self.msg_data, msg_offset) + shape, maxshape = determine_data_shape(self.msg_data, msg_offset) + return shape + + @property + def maxshape(self): + """ Maximum Shape of the dataset. (None for unlimited dimension) """ + msg = self.find_msg_type(DATASPACE_MSG_TYPE)[0] + msg_offset = msg['offset_to_message'] + shape, maxshape = determine_data_shape(self.msg_data, msg_offset) + return maxshape @property def fillvalue(self): @@ -648,6 +657,8 @@ def is_dataset(self): """ True when DataObjects points to a dataset, False for a group. """ return len(self.find_msg_type(DATASPACE_MSG_TYPE)) > 0 +UNLIMITED_SIZE = struct.unpack(' Date: Thu, 22 Feb 2024 16:37:22 -0600 Subject: [PATCH 4/5] add 3.11 and 3.12 to test matrix --- .github/workflows/pytest.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 3a382d5..6081f94 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -16,7 +16,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.8", "3.9", "3.10"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v3 From e933c466eea79331a34a64e9616e88703ba21a9b Mon Sep 17 00:00:00 2001 From: bbm Date: Tue, 27 Feb 2024 15:08:15 -0500 Subject: [PATCH 5/5] fix usage of determine_data_shape, and set UNLIMITED_SIZE constant from existing UNDEFINED_ADDRESS, as they have the same value --- pyfive/dataobjects.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pyfive/dataobjects.py b/pyfive/dataobjects.py index 7be5d80..d2c18cc 100644 --- a/pyfive/dataobjects.py +++ b/pyfive/dataobjects.py @@ -19,6 +19,9 @@ from .btree import GZIP_DEFLATE_FILTER, SHUFFLE_FILTER, FLETCH32_FILTER from .misc_low_level import Heap, SymbolTable, GlobalHeap, FractalHeap +# these constants happen to have the same value... +UNLIMITED_SIZE = UNDEFINED_ADDRESS + class DataObjects(object): """ @@ -180,7 +183,7 @@ def unpack_attribute(self, offset): offset += _padded_size(attr_dict['datatype_size'], padding_multiple) # read in the dataspace information - shape = determine_data_shape(self.msg_data, offset) + shape, maxshape = determine_data_shape(self.msg_data, offset) items = int(np.product(shape)) offset += _padded_size(attr_dict['dataspace_size'], padding_multiple) @@ -657,8 +660,6 @@ def is_dataset(self): """ True when DataObjects points to a dataset, False for a group. """ return len(self.find_msg_type(DATASPACE_MSG_TYPE)) > 0 -UNLIMITED_SIZE = struct.unpack('