Skip to content

Commit

Permalink
Create test for repickling and switch to highest protocol
Browse files Browse the repository at this point in the history
  • Loading branch information
DaanVanVugt committed Mar 2, 2020
1 parent 2e69f33 commit 27ca806
Show file tree
Hide file tree
Showing 6 changed files with 166 additions and 12 deletions.
15 changes: 12 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,24 @@ multiple processes needs MPI support.
```python
import pickle, h5pickle
f = h5pickle.File('filename.h5', 'r', skip_cache=False) # skip_cache = True by default
f2 = pickle.loads(pickle.dumps(f))
f2 = pickle.loads(pickle.dumps(f, protocol=pickle.HIGHEST_PROTOCOL))
f2 == f # True

g = pickle.loads(pickle.dumps(f['/group/'])) # works
d = pickle.loads(pickle.dumps(f['/group/set'])) # works
g = pickle.loads(pickle.dumps(f['/group/'], protocol=pickle.HIGHEST_PROTOCOL)) # works
d = pickle.loads(pickle.dumps(f['/group/set'], protocol=pickle.HIGHEST_PROTOCOL)) # works
```

_Be very careful using this with any file open flags other than 'r' in a parallel context_

It is recommended to use at least protocol 2. Some features are known to work with lower protocols

## Testing
A few tests are available in the `tests/` folder. Run them with
```bash
pytest
```


## References
Inspired by

Expand Down
7 changes: 6 additions & 1 deletion h5pickle/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@ def __getstate__(self):
def __setstate__(self, state):
"""File is reopened by pickle. Create a dataset and steal its identity"""
self.__init__(state['file'][state['name']].id)
self.file_info = state['file']

def __getnewargs__(self):
"""Override the h5py getnewargs to skip its error message"""
return ()

class Dataset(PickleAbleH5PyObject, h5py.Dataset):
"""Mix in our pickling class"""
Expand All @@ -91,7 +96,7 @@ def arghash(*args, **kwargs):
class File(h5py.File):
"""A subclass of h5py.File that implements a memoized cache and pickling.
Use this if you are going to be creating h5py.Files of the same file often.
Pickling is done not with __{get,set}state__ but with __getnewargs_ex__
which produces the arguments to supply to the __new__ method. This is required
to allow for memoization of unpickled values.
Expand Down
30 changes: 30 additions & 0 deletions tests/test_pickling_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""
Test whether pickling hdf5 datasets works in single_process mode
"""

import unittest
import tempfile
import pickle
import os
import h5py
import h5pickle

class PicklingTest(unittest.TestCase):
file = tempfile.mkstemp(suffix='.h5')[1]
def setUp(self):
with h5py.File(self.file, 'w') as f:
f['a'] = 1

def test_readonly_skip_cache(self):
f = h5pickle.File(self.file, 'r', skip_cache=True)
self.assertEqual(f['a'][()], 1, 'can read from file')

g = pickle.loads(pickle.dumps(f['a']))

self.assertEqual(g[()], 1, 'reading from dataset should give correct result')

f.close()
g.file.close()

if __name__ == '__main__':
unittest.main()
80 changes: 80 additions & 0 deletions tests/test_pickling_protocols.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
"""
Test whether pickling hdf5 datasets works in single_process mode
"""

import unittest
import tempfile
import pickle
import os
import h5py
import h5pickle

class PicklingTest(unittest.TestCase):
file = tempfile.mkstemp(suffix='.h5')[1]
def setUp(self):
with h5py.File(self.file, 'w') as f:
f['a'] = 1

@unittest.expectedFailure
def test_0(self):
f = h5pickle.File(self.file, 'r', skip_cache=True)
protocol = 0

h = pickle.loads(pickle.dumps(f, protocol=protocol))
self.assertEqual(f['a'][()], 1, 'reading from file dataset should give correct result')

g = pickle.loads(pickle.dumps(f['a'], protocol=protocol))
self.assertEqual(g[()], 1, 'reading from dataset should give correct result')

f.close()
h.close()
g.file.close()

@unittest.expectedFailure
def test_1(self):
f = h5pickle.File(self.file, 'r', skip_cache=True)
protocol = 1

h = pickle.loads(pickle.dumps(f, protocol=protocol))
self.assertEqual(f['a'][()], 1, 'reading from file dataset should give correct result')

g = pickle.loads(pickle.dumps(f['a'], protocol=protocol))
self.assertEqual(g[()], 1, 'reading from dataset should give correct result')

f.close()
h.close()
g.file.close()

def test_2(self):
f = h5pickle.File(self.file, 'r', skip_cache=True)
protocol = 2

h = pickle.loads(pickle.dumps(f, protocol=protocol))
self.assertEqual(f['a'][()], 1, 'reading from file dataset should give correct result')

g = pickle.loads(pickle.dumps(f['a'], protocol=protocol))
self.assertEqual(g[()], 1, 'reading from dataset should give correct result')

f.close()
h.close()
g.file.close()

def test_3(self):
f = h5pickle.File(self.file, 'r', skip_cache=True)
protocol = 3

h = pickle.loads(pickle.dumps(f, protocol=protocol))
self.assertEqual(f['a'][()], 1, 'reading from file dataset should give correct result')

g = pickle.loads(pickle.dumps(f['a'], protocol=protocol))
self.assertEqual(g[()], 1, 'reading from dataset should give correct result')

f.close()
h.close()
g.file.close()

def tearDown(self):
os.remove(self.file)

if __name__ == '__main__':
unittest.main()
17 changes: 9 additions & 8 deletions tests/test_pickling_single_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@
"""

import unittest
import h5pickle
import h5py
import tempfile
import pickle
import os
import h5py
import h5pickle

class PicklingTest(unittest.TestCase):
file = 'test.h5'
file = tempfile.mkstemp(suffix='.h5')[1]
def setUp(self):
with h5py.File(self.file, 'w') as f:
f['a'] = 1
Expand All @@ -18,7 +19,7 @@ def test_readonly_skip_cache(self):
f = h5pickle.File(self.file, 'a', skip_cache=True)
self.assertEqual(f['a'][()], 1, 'can read from file')

g = pickle.loads(pickle.dumps(f))
g = pickle.loads(pickle.dumps(f, protocol=pickle.HIGHEST_PROTOCOL))

self.assertEqual(g['a'][()], 1, 'reading from dataset should give correct result')
# Since cache is skipped I want 2 different handles
Expand All @@ -31,7 +32,7 @@ def test_readonly_skip_cache(self):
def test_readonly(self):
f = h5pickle.File(self.file, 'a', skip_cache=False)

g = pickle.loads(pickle.dumps(f))
g = pickle.loads(pickle.dumps(f, protocol=pickle.HIGHEST_PROTOCOL))

# Due to the caching mechanism f and g should be the same
self.assertEqual(id(f), id(g), 'pickling and unpickling should use cache')
Expand All @@ -48,13 +49,13 @@ def tearDown(self):


class PicklingWritableTest(unittest.TestCase):
file = 'test.h5'
file = tempfile.mkstemp(suffix='.h5')[1]
def test_create_writable_file(self):
f = h5pickle.File(self.file, 'w', skip_cache=True)

got_oserror = False
try:
g = pickle.loads(pickle.dumps(f))
g = pickle.loads(pickle.dumps(f, protocol=pickle.HIGHEST_PROTOCOL))
# We expect an error here, since we cannot open on multiple processes
except OSError:
got_oserror = True
Expand Down
29 changes: 29 additions & 0 deletions tests/test_repickling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"""
Test whether re-pickling hdf5 works
"""

import unittest
import tempfile
import pickle
import os
import h5py
import h5pickle

class RePicklingTest(unittest.TestCase):
file = tempfile.mkstemp(suffix='.h5')[1]
def setUp(self):
with h5py.File(self.file, 'w') as f:
f['a'] = 1

def test_repickling(self):
f = h5pickle.File(self.file, 'r', skip_cache=True)
dataset = f['a']
dataset_pickled = pickle.dumps(dataset, protocol=pickle.HIGHEST_PROTOCOL)
dataset_unpickled = pickle.loads(dataset_pickled)
dataset_repickled = pickle.dumps(dataset_unpickled, protocol=pickle.HIGHEST_PROTOCOL)

def tearDown(self):
os.remove(self.file)

if __name__ == '__main__':
unittest.main()

0 comments on commit 27ca806

Please sign in to comment.