Skip to content

Commit df0de1e

Browse files
committed
Another try; now when pickling we specify a directory wrt we store all absolute filepaths
1 parent dc8e3ab commit df0de1e

File tree

20 files changed

+200
-95
lines changed

20 files changed

+200
-95
lines changed

src/koopmans/calculators/_calculator.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,14 @@ def absolute_directory(self) -> Path:
164164
raise AttributeError(f'Expected `{obj.__class__.__name__}` instance to have a `base_directory` attribute')
165165
return obj.base_directory / path
166166

167+
@property
168+
def base_directory(self) -> Path:
169+
if self.parent is not None:
170+
return self.parent.base_directory
171+
else:
172+
raise ValueError(
173+
f'{self.__class__.__name__} does not have a parent, so `base_directory` cannot be accessed')
174+
167175
def calculate(self):
168176
"""Generic function for running a calculator"""
169177

src/koopmans/files.py

Lines changed: 67 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -72,43 +72,87 @@ def __eq__(self, other):
7272

7373
def __reduce__(self):
7474
# We don't want to store the entire parent object in the database; we only need the directory information
75-
abs_dir = self.parent.absolute_directory
76-
dummy_parent = ParentPlaceholder(directory=self.parent.directory,
77-
absolute_directory=self.parent.absolute_directory)
75+
dummy_parent = ParentPlaceholder.fromobj(self.parent)
7876
return (FilePointer, (dummy_parent, self.name))
7977

8078

8179
class ParentPlaceholder:
82-
# Move into the test suite?
83-
def __init__(self, directory, absolute_directory):
80+
# Placeholder parent for FilePointers that don't have a Workflow/Process/Calculator as a parent
81+
def __init__(self, parent, directory, base_directory=None):
82+
self.parent = parent
8483
self.directory = directory
85-
self._absolute_directory = absolute_directory
84+
if self.parent is None:
85+
self.base_directory = base_directory
86+
else:
87+
self._base_directory = None
8688

87-
@property
88-
def absolute_directory(self):
89-
# This is a property in order to follow the HasDirectoryInfo protocol
90-
return Path(__file__).parents[2] / self._absolute_directory
89+
def __repr__(self):
90+
return f'ParentPlaceholder(directory={self.absolute_directory})'
9191

92+
@classmethod
93+
def fromobj(cls, obj, replace_parents_with_placeholders=False):
94+
if replace_parents_with_placeholders:
95+
if obj.parent is None:
96+
parent = None
97+
else:
98+
parent = cls.fromobj(obj, replace_parents_with_placeholders=True)
99+
else:
100+
parent = obj.parent
92101

93-
class AbsolutePath:
94-
""" A class that can stand in as a parent in a FilePointer when the file is unattached to a Calculator or Process """
102+
if parent is None:
103+
base_directory = obj.base_directory
104+
else:
105+
base_directory = None
106+
directory = obj.directory
107+
new_obj = cls(parent, directory, base_directory)
95108

96-
def __init__(self, directory: Path | str | None):
97-
directory = Path(directory) if isinstance(directory, str) else directory
98-
self.directory: Path | None = directory
109+
# Sanity checks
110+
assert new_obj.directory == obj.directory
111+
assert new_obj.absolute_directory == obj.absolute_directory
112+
assert new_obj.base_directory == obj.base_directory
99113

100-
def __eq__(self, other):
101-
if not isinstance(other, AbsolutePath):
102-
return False
103-
return self.directory == other.directory
114+
return new_obj
115+
116+
@classmethod
117+
def frompath(cls, path: Path):
118+
return cls(None, Path(), path)
104119

105120
@property
106-
def absolute_directory(self) -> Path | None:
107-
return self.directory
121+
def base_directory(self) -> Path:
122+
if self.parent is not None:
123+
return self.parent.base_directory
124+
else:
125+
if self._base_directory is None:
126+
raise ValueError(f'{self.__class__.__name__}.base_directory has not been set')
127+
return self._base_directory
128+
129+
@base_directory.setter
130+
def base_directory(self, value: Path):
131+
if self.parent is not None:
132+
raise ValueError(f'{self.__class__.__name__}.base_directory should not be set for processes with parents')
133+
self._base_directory = value.resolve()
134+
135+
@property
136+
def absolute_directory(self) -> Path:
137+
assert self.directory is not None
138+
if self.parent is None:
139+
abs_dir = Path(self.base_directory / self.directory)
140+
assert abs_dir.is_absolute()
141+
return abs_dir
142+
path = self.parent.directory / self.directory
143+
144+
# Recursive through the parents, adding their directories to path (these are all relative paths)...
145+
obj = self.parent
146+
while getattr(obj, 'parent', None):
147+
assert obj.parent is not None
148+
path = obj.parent.directory / path
149+
obj = obj.parent
150+
151+
# Finally, 'path' is relative to self.base_directory
152+
return self.base_directory / path
108153

109154

110155
def AbsoluteFilePointer(path: Path | str) -> FilePointer:
111156
path = path if isinstance(path, Path) else Path(path)
112-
path = path.resolve()
113-
parent = AbsolutePath(directory=path.parent)
157+
parent = ParentPlaceholder.frompath(path.parent)
114158
return FilePointer(parent=parent, name=Path(path.name))

src/koopmans/io/_dill.py

Lines changed: 46 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,45 +1,68 @@
11
import os
22
from pathlib import Path
3-
from typing import Any
3+
from typing import Any, Optional
44

55
import dill
66

7+
BASE_PLACEHOLDER = '/BASE_DIRECTORY_PLACEHOLDER/'
78

8-
class PicklerWithRelativePaths(dill.Pickler):
9-
# A pickler that converts absolute paths to relative paths, used during the test suite in combination
10-
# with a monkeypatching of the Path.__eq__ method
9+
10+
class CustomPicklerUsePlaceholder(dill.Pickler):
1111
def __init__(self, base_directory, *args, **kwargs):
1212
super().__init__(*args, **kwargs)
1313
self.base_directory = base_directory
1414

15-
def reducer_override(self, obj):
16-
if isinstance(obj, Path):
17-
return Path, pickle_path(obj, self.base_directory)
18-
else:
19-
return NotImplemented
15+
def persistent_id(self, obj):
16+
"""Called when pickling objects. If obj is a Path, return a unique identifier."""
17+
if isinstance(obj, Path) and obj.is_absolute():
18+
if self.base_directory == obj:
19+
return ('Path', BASE_PLACEHOLDER)
20+
# Return a tuple to identify this as a Path that needs special handling
21+
tup = ('Path', str((Path(BASE_PLACEHOLDER) / os.path.relpath(obj, self.base_directory))))
22+
if 'data' in tup[1]:
23+
assert '..' in tup[1]
24+
return tup
25+
return None # No special handling required for other objects
26+
27+
28+
class CustomUnpicklerReplacePlaceholder(dill.Unpickler):
29+
def __init__(self, base_directory, *args, **kwargs):
30+
super().__init__(*args, **kwargs)
31+
self.base_directory = base_directory.resolve()
32+
33+
def persistent_load(self, pid):
34+
"""Called when unpickling to resolve persistent ids."""
35+
if isinstance(pid, tuple) and pid[0] == 'Path' and pid[1].startswith(BASE_PLACEHOLDER):
36+
# Replace the placeholder with the actual current working directory
37+
path = (self.base_directory / pid[1][len(BASE_PLACEHOLDER):]).resolve()
38+
if 'data' in pid[1]:
39+
assert 'tmp' not in str(path)
40+
return path
41+
return pid # If it's not a special persistent id, return as is
2042

2143

22-
def pickle_path(path: Path, base_directory: Path):
23-
# Convert the path to relative if it's absolute
24-
base_directory = base_directory.resolve()
25-
if path.is_absolute():
26-
rel_path = os.path.relpath(path, base_directory)
27-
return (str(rel_path),)
28-
else:
29-
# Return relative paths as-is
30-
return (str(path),)
44+
class CustomUnpicklerKeepPlaceholder(dill.Unpickler):
45+
def persistent_load(self, pid):
46+
if isinstance(pid, tuple) and pid[0] == 'Path':
47+
return Path(pid[1])
3148

3249

33-
def read_pkl(filename: Path | str) -> Any:
50+
def read_pkl(filename: Path | str, base_directory: Optional[Path] = None) -> Any:
3451
with open(filename, 'rb') as fd:
35-
out = dill.load(fd)
52+
if base_directory is None:
53+
unpickler = CustomUnpicklerKeepPlaceholder(fd)
54+
else:
55+
unpickler = CustomUnpicklerReplacePlaceholder(base_directory, fd)
56+
out = unpickler.load()
57+
3658
return out
3759

3860

3961
def write_pkl(obj: Any, filename: Path | str, base_directory: Path | None = None):
4062
filename = Path(filename) if not isinstance(filename, Path) else filename
41-
if base_directory is None:
42-
base_directory = filename.parent
4363
with open(filename, 'wb') as fd:
44-
pickler = PicklerWithRelativePaths(base_directory, fd)
64+
if base_directory is None:
65+
pickler = dill.Pickler(fd)
66+
else:
67+
pickler = CustomPicklerUsePlaceholder(base_directory, fd)
4568
pickler.dump(obj)

src/koopmans/processes/_process.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,9 @@ def base_directory(self, value: Path):
140140
def absolute_directory(self) -> Path:
141141
assert self.directory is not None
142142
if self.parent is None:
143-
return self.base_directory / self.directory
143+
abs_dir = Path(self.base_directory / self.directory)
144+
assert abs_dir.is_absolute()
145+
return abs_dir
144146
path = self.parent.directory / self.directory
145147

146148
# Recursive through the parents, adding their directories to path (these are all relative paths)...
@@ -150,7 +152,5 @@ def absolute_directory(self) -> Path:
150152
path = obj.parent.directory / path
151153
obj = obj.parent
152154

153-
# ... until we reach the top-level parent, which should have a base_directory attribute (an absolute path)
154-
if not hasattr(obj, 'base_directory'):
155-
raise AttributeError(f'Expected `{obj.__class__.__name__}` instance to have a `base_directory` attribute')
156-
return obj.base_directory / path
155+
# Finally, 'path' is relative to self.base_directory
156+
return self.base_directory / path
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

tests/calculators/test_ph.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def test_read_dynG(tio2, tmp_path, datadir, pytestconfig):
2929

3030
if pytestconfig.getoption('generate_benchmark'):
3131
# Write the dielectric tensor to file
32-
write_pkl(eps, benchmark_filename(calc), base_directory=Path(__file__).parents[2])
32+
write_pkl(eps, benchmark_filename(calc))
3333
else:
3434
# Compare with the dielectric tensor on file
3535
eps_ref = read_pkl(benchmark_filename(calc))

tests/calculators/test_projwfc.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,5 +38,5 @@ def test_generate_dos(silicon, tmp_path, datadir, pytestconfig):
3838
write_pkl(dos, benchmark_filename(calc))
3939
else:
4040
# Compare with the DOS on file
41-
dos_ref = read_pkl(benchmark_filename(calc), base_directory=Path(__file__).parents[2])
41+
dos_ref = read_pkl(benchmark_filename(calc))
4242
assert dos == dos_ref

tests/conftest.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,7 @@
55
import koopmans
66

77
from .helpers.os import datadir, sys2file
8-
from .helpers.patches import (check_patch, espresso_patch,
9-
patch_path_comparison, tutorial_patch,
8+
from .helpers.patches import (check_patch, espresso_patch, tutorial_patch,
109
workflow_patch)
1110
from .helpers.strategies import ase_cells, bandpaths, kpoints
1211
from .helpers.systems import gaas, ozone, silicon, tio2, water, water_snapshots

tests/helpers/patches/__init__.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,21 +6,6 @@
66
from ._utils import benchmark_filename
77

88

9-
@pytest.fixture(autouse=True)
10-
def patch_path_comparison(monkeypatch):
11-
def path_eq(self, other):
12-
if not isinstance(other, type(self)):
13-
return False
14-
if self.is_absolute() == other.is_absolute():
15-
return str(self) == str(other)
16-
else:
17-
# In some cases we store paths relative to a base directory. Without knowing
18-
# the base directory, the best we can do is check that the relative path
19-
# ends the same as the absolute path
20-
return str(self.resolve()).endswith(str(other)) or str(other).endswith(str(self))
21-
monkeypatch.setattr('pathlib.Path.__eq__', path_eq)
22-
23-
249
def monkeypatch_stumble(monkeypatch):
2510
from ._stumble import (StumblingConvergenceWorkflow,
2611
StumblingDeltaSCFWorkflow, StumblingDFTCPWorkflow,

tests/helpers/patches/_benchmark.py

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def _calculate(self):
4545

4646
self.parent, parent = None, self.parent
4747
self.linked_files, linked_files = [], self.linked_files
48-
write_pkl(self, filename, base_directory=base_directory)
48+
write_pkl(self, filename, self.base_directory)
4949
self.parent = parent
5050
self.linked_files = linked_files
5151

@@ -91,29 +91,15 @@ def _run(self):
9191
filename.parent.mkdir(parents=True)
9292
# Temporarily wipe the parent attribute so that the entire workflow doesn't get pickled
9393
self.parent, parent = None, self.parent
94-
write_pkl(self, filename, base_directory=base_directory)
94+
write_pkl(self, filename, self.base_directory)
9595
self.parent = parent
9696

9797
# Copy over all files that are outputs of the process that need to be read
9898
for filepath in recursively_find_files([o for _, o in self.outputs]):
9999
if filepath.name in ['power_spectrum.npy']:
100100
shutil.copy(filepath, benchmark_filename(self).parent / filepath.name)
101101

102-
# Patching the absolute_directory property
103-
unpatched_absolute_directory = p.absolute_directory
104-
105-
def absolute_directory(self) -> Path:
106-
if self.parent is None:
107-
# Because we wipe parents when storing benchmarks (see above), this prevents us from being able to construct
108-
# an absolute directory to locate files. Usually, this would raise an error. For the purposes of the test suite,
109-
# instead simply use the base directory of the repo
110-
return Path().resolve().relative_to(base_directory)
111-
else:
112-
# Default behavior
113-
return unpatched_absolute_directory.__get__(self)
114-
115102
monkeypatch.setattr(p, '_run', _run)
116-
monkeypatch.setattr(p, 'absolute_directory', property(absolute_directory))
117103

118104

119105
def monkeypatch_bench(monkeypatch):

tests/helpers/patches/_check.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,7 @@ def patch_process(p, monkeypatch):
341341

342342
def _run(self):
343343
# Load the benchmark
344-
bench_process = read_pkl(benchmark_filename(self))
344+
bench_process = read_pkl(benchmark_filename(self), base_directory=self.base_directory)
345345

346346
# Compare the inputs
347347
assert bench_process.inputs == self.inputs

tests/helpers/patches/_mock.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def mock_calculator__calculate(self):
3131
with chdir(self.directory):
3232
# By moving into the directory where the calculation was run, we ensure when we read in the settings that
3333
# paths are interpreted relative to this particular working directory
34-
calc = read_pkl(benchmark_filename(self))
34+
calc = read_pkl(benchmark_filename(self), base_directory=self.directory)
3535

3636
# Compare the settings
3737
for key in set(list(self.parameters.keys()) + list(calc.parameters.keys())):
@@ -129,7 +129,7 @@ def generate_dos(self):
129129

130130
def mock_process_run(self):
131131
# Load the inputs from file
132-
bench_process = read_pkl(benchmark_filename(self))
132+
bench_process = read_pkl(benchmark_filename(self), base_directory=self.base_directory)
133133
assert self.inputs == bench_process.inputs
134134
self.outputs = bench_process.outputs
135135

tests/helpers/patches/_utils.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,12 @@ def benchmark_filename(obj) -> Path:
2323
tests_dir = base_directory / 'tests'
2424
name = getattr(obj, 'prefix', getattr(obj, 'name', None))
2525
assert isinstance(name, str)
26-
if tests_dir / 'tmp' in obj.absolute_directory.parents:
27-
benchmark_name = obj.absolute_directory.relative_to(tests_dir / 'tmp') / name
26+
abs_dir = obj.absolute_directory
27+
if tests_dir / 'tmp' in abs_dir.parents:
28+
try:
29+
benchmark_name = abs_dir.relative_to(tests_dir / 'tmp') / name
30+
except:
31+
raise ValueError()
2832
else:
2933
# This is a tutorial test, which we run in the tutorials directory. Because of this, we don't have access
3034
# to the tmp_path automatically generated by pytest, so we need to construct something similar

tests/io/test_dill.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
from pathlib import Path
2+
3+
import pytest
4+
5+
from koopmans.io import read_pkl, write_pkl
6+
from koopmans.utils import chdir
7+
8+
base_directory = Path('/home/user/base_directory')
9+
paths = [base_directory / 'subpath/to/directory',
10+
Path('/home/user/path/that/is/not/a/subpath/of/base_directory/'), Path('relative/path')]
11+
12+
13+
@pytest.mark.parametrize('obj', paths)
14+
def test_roundtrip_pkl_with_placeholder(obj, tmp_path):
15+
with chdir(tmp_path):
16+
write_pkl(obj, 'test.pkl', base_directory=base_directory)
17+
18+
new_obj = read_pkl('test.pkl', base_directory=base_directory)
19+
20+
assert obj == new_obj
21+
22+
23+
@pytest.mark.parametrize('obj', paths)
24+
def test_roundtrip_pkl_without_placeholder(obj, tmp_path):
25+
with chdir(tmp_path):
26+
write_pkl(obj, 'test.pkl')
27+
28+
new_obj = read_pkl('test.pkl')
29+
30+
assert obj == new_obj

0 commit comments

Comments
 (0)