Skip to content

Commit

Permalink
This version appears to now support failing over from a memory map to…
Browse files Browse the repository at this point in the history
… a pseudo chunked read. Lots of things to do around optimising that read, but let's test this more widely first.
  • Loading branch information
Bryan Lawrence committed Mar 7, 2024
1 parent e6217b5 commit 67c93e0
Showing 1 changed file with 12 additions and 15 deletions.
27 changes: 12 additions & 15 deletions pyfive/dataobjects.py
Original file line number Diff line number Diff line change
Expand Up @@ -617,10 +617,10 @@ def __init__(self,*args,**kwargs):
# pseudo chunk blocksize: this is a size below which we don't bother
# pseudo chunking for contiguous data and just load the lot at data
# access time: units are kibibytes
self.pseudo_chunking = False
self.pseudo_chunking = True
self.pseudo_block_size_kib = 0
# We can't use mmaps on S3
self.avoid_mmap = False
self.avoid_mmap = True
##########################################################################

# offset and size from data storage message
Expand All @@ -637,8 +637,6 @@ def get_data(self, args=None):
if self.layout_class == 0: # compact storage
raise NotImplementedError("Compact storage")
elif self.layout_class == 1: # contiguous storage
if self.avoid_mmap:
return self._get_selection_from_contiguous(args)
return self._get_contiguous_data(self.property_offset,args)
if self.layout_class == 2: # chunked storage
# If reading all chunks, use the (hopefully faster) "do it one go" method.
Expand Down Expand Up @@ -676,15 +674,18 @@ def _get_contiguous_data(self, property_offset, args):

if data_offset == UNDEFINED_ADDRESS:
# no storage is backing array, return all zeros
result = np.zeros(self.shape, dtype=self.dtype)
return np.zeros(self.shape, dtype=self.dtype)[args]

if not isinstance(self.dtype, tuple):
try:
# return a memory-map to the stored array with copy-on-write
result = np.memmap(self.fh, dtype=self.dtype, mode='c',
offset=data_offset, shape=self.shape, order=self.order)
except UnsupportedOperation:
if self.avoid_mmap:
return self._get_selection_from_contiguous(args)
else:
try:
# return a memory-map to the stored array with copy-on-write
return np.memmap(self.fh, dtype=self.dtype, mode='c',
offset=data_offset, shape=self.shape, order=self.order)[args]
except UnsupportedOperation:
return self._get_selection_from_contiguous(args)
else:
dtype_class = self.dtype[0]
if dtype_class == 'REFERENCE':
Expand All @@ -694,13 +695,9 @@ def _get_contiguous_data(self, property_offset, args):
ref_addresses = np.memmap(
self.fh, dtype=('<u8'), mode='c', offset=data_offset,
shape=self.shape, order=self.order)
return np.array([Reference(addr) for addr in ref_addresses])
return np.array([Reference(addr) for addr in ref_addresses])[args]
else:
raise NotImplementedError('datatype not implemented')
if args is None:
return result
else:
return result[args]

def _get_chunked_data(self, offset):
""" Return data which is chunked. """
Expand Down

0 comments on commit 67c93e0

Please sign in to comment.