From 44f58059f0237af80d802e22668b9df860614cc5 Mon Sep 17 00:00:00 2001 From: PythonFZ Date: Thu, 16 May 2024 11:07:01 +0200 Subject: [PATCH 01/11] save units in attrs of H5 --- znh5md/format/__init__.py | 17 +++++++++++++++++ znh5md/io/base.py | 23 ++++++++++++++++++----- znh5md/io/reader.py | 6 +++++- 3 files changed, 40 insertions(+), 6 deletions(-) diff --git a/znh5md/format/__init__.py b/znh5md/format/__init__.py index 509bb6a..cac2f58 100644 --- a/znh5md/format/__init__.py +++ b/znh5md/format/__init__.py @@ -66,6 +66,23 @@ def decode_boundary(value) -> np.ndarray: OBSERVABLES_GRP = [GRP.energy, GRP.stress] +@dataclasses.dataclass +class StepTimeChunkUnits: + value: str + time: str + + +# We store everything in ASE units +UNITS_PER_GRP = { + GRP.position: StepTimeChunkUnits(value="Angstrom", time="fs"), + GRP.velocity: StepTimeChunkUnits(value="Angstrom/fs", time="fs"), + GRP.forces: StepTimeChunkUnits(value="eV/Angstrom", time="fs"), + GRP.energy: StepTimeChunkUnits(value="eV", time="fs"), + GRP.stress: StepTimeChunkUnits(value="GPa", time="fs"), + GRP.momentum: StepTimeChunkUnits(value="eV/fs", time="fs"), + GRP.edges: StepTimeChunkUnits(value="Angstrom", time="fs"), + GRP.boundary: StepTimeChunkUnits(value="Angstrom", time="fs"), +} @dataclasses.dataclass class FormatHandler: diff --git a/znh5md/io/base.py b/znh5md/io/base.py index 66a9d20..2616e7d 100644 --- a/znh5md/io/base.py +++ b/znh5md/io/base.py @@ -32,6 +32,9 @@ class StepTimeChunk: value: np.ndarray step: np.ndarray time: np.ndarray + + value_units: str = None + time_units: str = None @property def shape(self) -> tuple: @@ -83,16 +86,21 @@ class ExplicitStepTimeChunk(StepTimeChunk): def create_dataset(self, dataset_group: h5py.Group): """Create the datasets for the chunk.""" - dataset_group.create_dataset( + value_ds = dataset_group.create_dataset( "value", maxshape=self.shape, data=self.value, chunks=True ) - dataset_group.create_dataset( + time_ds = dataset_group.create_dataset( "time", maxshape=(None,), data=self.time, chunks=True ) dataset_group.create_dataset( "step", maxshape=(None,), data=self.step, chunks=True ) + if self.value_units is not None: + value_ds.attrs["unit"] = self.value_units + if self.time_units is not None: + time_ds.attrs["unit"] = self.time_units + def append_to_dataset(self, dataset_group: h5py.Group): n_current_frames = dataset_group["value"].shape[0] @@ -113,12 +121,17 @@ class FixedStepTimeChunk(StepTimeChunk): def create_dataset(self, dataset_group: h5py.Group): """Create the datasets for the chunk.""" - dataset_group.create_dataset( + value_ds = dataset_group.create_dataset( "value", maxshape=self.shape, data=self.value, chunks=True ) - dataset_group.create_dataset("time", data=self.time) + time_ds = dataset_group.create_dataset("time", data=self.time) dataset_group.create_dataset("step", data=self.step) + if self.value_units is not None: + value_ds.attrs["unit"] = self.value_units + if self.time_units is not None: + time_ds.attrs["unit"] = self.time_units + def append_to_dataset(self, dataset_group: h5py.Group): n_current_frames = dataset_group["value"].shape[0] @@ -208,7 +221,7 @@ def create_group(self, db_path, group_name, chunk_data): dataset_group = db_path.create_group(group_name) chunk_data.create_dataset(dataset_group) - def add_data_to_group(self, db_path, group_name, chunk_data): + def add_data_to_group(self, db_path, group_name, chunk_data: StepTimeChunk): """Add data to an existing group. For each group in kwargs, the following datasets are resized and appended to: diff --git a/znh5md/io/reader.py b/znh5md/io/reader.py index 1fb25fb..34c15cc 100644 --- a/znh5md/io/reader.py +++ b/znh5md/io/reader.py @@ -8,7 +8,7 @@ import tqdm from ase.calculators.calculator import PropertyNotImplementedError -from znh5md.format import GRP +from znh5md.format import GRP, UNITS_PER_GRP from znh5md.io.base import DataReader, FixedStepTimeChunk log = logging.getLogger(__name__) @@ -127,6 +127,8 @@ def yield_chunks( value=value, step=self.step, time=self.time, + time_units=UNITS_PER_GRP[name].time if name in UNITS_PER_GRP else None, + value_units=UNITS_PER_GRP[name].value if name in UNITS_PER_GRP else None, ) except (PropertyNotImplementedError, RuntimeError, KeyError) as err: if group_names is not None: @@ -149,6 +151,8 @@ def yield_chunks( value=value, step=self.step, time=self.time, + time_units=UNITS_PER_GRP[GRP.energy].time if GRP.energy in UNITS_PER_GRP else None, + value_units=UNITS_PER_GRP[GRP.energy].value if GRP.energy in UNITS_PER_GRP else None, ) yield data start_index = stop_index From 479e04cef50012b29f1ac3f1a2f1aff82a21b57c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 16 May 2024 09:07:24 +0000 Subject: [PATCH 02/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- znh5md/format/__init__.py | 2 ++ znh5md/io/base.py | 2 +- znh5md/io/reader.py | 20 ++++++++++++++++---- 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/znh5md/format/__init__.py b/znh5md/format/__init__.py index cac2f58..da26e7a 100644 --- a/znh5md/format/__init__.py +++ b/znh5md/format/__init__.py @@ -66,6 +66,7 @@ def decode_boundary(value) -> np.ndarray: OBSERVABLES_GRP = [GRP.energy, GRP.stress] + @dataclasses.dataclass class StepTimeChunkUnits: value: str @@ -84,6 +85,7 @@ class StepTimeChunkUnits: GRP.boundary: StepTimeChunkUnits(value="Angstrom", time="fs"), } + @dataclasses.dataclass class FormatHandler: filename: str diff --git a/znh5md/io/base.py b/znh5md/io/base.py index 2616e7d..34ec495 100644 --- a/znh5md/io/base.py +++ b/znh5md/io/base.py @@ -32,7 +32,7 @@ class StepTimeChunk: value: np.ndarray step: np.ndarray time: np.ndarray - + value_units: str = None time_units: str = None diff --git a/znh5md/io/reader.py b/znh5md/io/reader.py index 34c15cc..e88ab7f 100644 --- a/znh5md/io/reader.py +++ b/znh5md/io/reader.py @@ -127,8 +127,12 @@ def yield_chunks( value=value, step=self.step, time=self.time, - time_units=UNITS_PER_GRP[name].time if name in UNITS_PER_GRP else None, - value_units=UNITS_PER_GRP[name].value if name in UNITS_PER_GRP else None, + time_units=( + UNITS_PER_GRP[name].time if name in UNITS_PER_GRP else None + ), + value_units=( + UNITS_PER_GRP[name].value if name in UNITS_PER_GRP else None + ), ) except (PropertyNotImplementedError, RuntimeError, KeyError) as err: if group_names is not None: @@ -151,8 +155,16 @@ def yield_chunks( value=value, step=self.step, time=self.time, - time_units=UNITS_PER_GRP[GRP.energy].time if GRP.energy in UNITS_PER_GRP else None, - value_units=UNITS_PER_GRP[GRP.energy].value if GRP.energy in UNITS_PER_GRP else None, + time_units=( + UNITS_PER_GRP[GRP.energy].time + if GRP.energy in UNITS_PER_GRP + else None + ), + value_units=( + UNITS_PER_GRP[GRP.energy].value + if GRP.energy in UNITS_PER_GRP + else None + ), ) yield data start_index = stop_index From 32ef54fff4a263b94fcc8f70b30ded8c4a54ad55 Mon Sep 17 00:00:00 2001 From: PythonFZ Date: Thu, 16 May 2024 11:22:25 +0200 Subject: [PATCH 03/11] hardcode PBC and 3D data --- znh5md/io/base.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/znh5md/io/base.py b/znh5md/io/base.py index 2616e7d..0bdaf72 100644 --- a/znh5md/io/base.py +++ b/znh5md/io/base.py @@ -252,6 +252,9 @@ def handle_boundary(self, file, chunk_data): # dimension group is required by H5MD atoms.create_dataset(f"box/{GRP.dimension}", data=len(chunk_data.value)) + atoms["box"].attrs["boundary"] = ["periodic", "periodic", "periodic"] + atoms["box"].attrs["dimension"] = 3 + def add_chunk_data(self, **kwargs: CHUNK_DICT) -> None: """Write Chunks to the database. From 67272b54305529062c8994302a80022f0cad9806 Mon Sep 17 00:00:00 2001 From: PythonFZ Date: Fri, 17 May 2024 10:21:36 +0200 Subject: [PATCH 04/11] only create groups when required --- poetry.lock | 219 ++++++++++++++++++++++++++++++++++++++- pyproject.toml | 1 + tests/test_mdanalysis.py | 38 +++++++ znh5md/io/base.py | 36 +++++-- 4 files changed, 281 insertions(+), 13 deletions(-) create mode 100644 tests/test_mdanalysis.py diff --git a/poetry.lock b/poetry.lock index a0a2b18..ffd138d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -752,6 +752,17 @@ files = [ [package.extras] tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipython", "littleutils", "pytest", "rich"] +[[package]] +name = "fasteners" +version = "0.19" +description = "A python package that provides useful locks" +optional = false +python-versions = ">=3.6" +files = [ + {file = "fasteners-0.19-py3-none-any.whl", hash = "sha256:758819cb5d94cdedf4e836988b74de396ceacb8e2794d21f82d131fd9ee77237"}, + {file = "fasteners-0.19.tar.gz", hash = "sha256:b4f37c3ac52d8a445af3a66bce57b33b5e90b97c696b7b984f530cf8f0ded09c"}, +] + [[package]] name = "fastjsonschema" version = "2.19.1" @@ -923,6 +934,22 @@ dev = ["flake8", "pep8-naming", "tox (>=3)", "twine", "wheel"] docs = ["sphinx (>=5,<7)", "sphinx-autodoc-typehints", "sphinx-rtd-theme"] test = ["coverage", "pytest (>=7,<8.1)", "pytest-cov", "pytest-mock (>=3)"] +[[package]] +name = "griddataformats" +version = "1.0.2" +description = "Reading and writing of data on regular grids in Python" +optional = false +python-versions = ">=3.9" +files = [ + {file = "GridDataFormats-1.0.2-py3-none-any.whl", hash = "sha256:4562bb2da9a064a8e6640e5fb853e204c8f32f6403651703f6b0ef58a009054c"}, + {file = "GridDataFormats-1.0.2.tar.gz", hash = "sha256:b93cf7f36fce33dbc428026f26dba560d5c7ba2387caca495bad920f90094502"}, +] + +[package.dependencies] +mrcfile = "*" +numpy = ">=1.21" +scipy = "*" + [[package]] name = "h11" version = "0.14.0" @@ -1205,6 +1232,17 @@ MarkupSafe = ">=2.0" [package.extras] i18n = ["Babel (>=2.7)"] +[[package]] +name = "joblib" +version = "1.4.2" +description = "Lightweight pipelining with Python functions" +optional = false +python-versions = ">=3.8" +files = [ + {file = "joblib-1.4.2-py3-none-any.whl", hash = "sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6"}, + {file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"}, +] + [[package]] name = "json5" version = "0.9.25" @@ -1763,6 +1801,68 @@ files = [ [package.dependencies] traitlets = "*" +[[package]] +name = "mda-xdrlib" +version = "0.2.0" +description = "Stand-alone XDRLIB module (from cpython 3.10.8)" +optional = false +python-versions = ">=3.7" +files = [ + {file = "mda_xdrlib-0.2.0-py3-none-any.whl", hash = "sha256:0d1757b339f5db2d017a89ddaae06a82cd7a2cce26b8063df9f52b64e933fb47"}, + {file = "mda_xdrlib-0.2.0.tar.gz", hash = "sha256:f26f7158a83c32b96d15b530fce2cbc1190c4b7024e41faa4ab3e3db74e272af"}, +] + +[package.extras] +testing = ["pytest"] + +[[package]] +name = "mdanalysis" +version = "2.7.0" +description = "An object-oriented toolkit to analyze molecular dynamics trajectories." +optional = false +python-versions = ">=3.9" +files = [ + {file = "MDAnalysis-2.7.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:752a7e79f2195e284bd5c6c74510e7a587050aa15a5fd5d1f0b1c173238c7f9e"}, + {file = "MDAnalysis-2.7.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d9f5506b70614aa21974488c61b150dcf443c2ef50bacba0ff9c5369fdd1dec8"}, + {file = "MDAnalysis-2.7.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cd8d2d5318f5adaa2b4143c60f5006bb9f5ef294eea87bea5b35127cc8f30399"}, + {file = "MDAnalysis-2.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3f6924273c498c7134cfe985f2779ebf2f1d4204161c354ae8175779a20794e9"}, + {file = "MDAnalysis-2.7.0-cp310-cp310-win_amd64.whl", hash = "sha256:348372eacd74a6be0cd7e2af9a5d6f0aae6b86c16868937bd57f4636e4e6141e"}, + {file = "MDAnalysis-2.7.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:45a6ad90705ff50ddae7f0d7a2b3ddfd0185e3e8314930874b9fc3ac56853ec9"}, + {file = "MDAnalysis-2.7.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3be89d52e3442d58b65eef89626147d1e319a7912a913c7fccd78ba878418800"}, + {file = "MDAnalysis-2.7.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9358a88e4558febc4eca58dc69aeaf836b8eacc92556e0205c6c40497907e3ab"}, + {file = "MDAnalysis-2.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1fd4b24cfb5fc63e9ae758251af947702f1a033f9bb38e54e206453c6afd3db0"}, + {file = "MDAnalysis-2.7.0-cp311-cp311-win_amd64.whl", hash = "sha256:eea51334fb3cc45475003e992c63205f5c5ea34920154ab2cea9d44120b5904b"}, + {file = "MDAnalysis-2.7.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:486794d7e957e959f612e1f664878e0244594864c057bff0748eca6fef7d1523"}, + {file = "MDAnalysis-2.7.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:aaf7c158a65e032d15ef63cc08a6790d87ec3d7ee6037eb778d1959aec6c305a"}, + {file = "MDAnalysis-2.7.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5613c102d2ba4ac6ff2527ba9ea1b2564fdd90f3e06e921752bbcad82c6b173e"}, + {file = "MDAnalysis-2.7.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cf975db416007242e5674ff44803ce8e36313c5602f401244858e26e848b9411"}, + {file = "MDAnalysis-2.7.0-cp312-cp312-win_amd64.whl", hash = "sha256:3bcc653057e8ea6f35a32e9993d840ae919842fa0756a966966d6eedc1072ee5"}, + {file = "MDAnalysis-2.7.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:c7dc2b9601126cdd19e2b39ee38fc359977f5ab949ae416fc2470b110fc2c75a"}, + {file = "MDAnalysis-2.7.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1bd57db448afd1b73f0ea8954f3f1fdf9033442d18cc9f2ef3f1c59930a24e0b"}, + {file = "MDAnalysis-2.7.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:81dfcd1f519fd288921eda7f97db0d1d287d55645c5cbe96031baf6209bfd2f5"}, + {file = "MDAnalysis-2.7.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:501a473d65ef6e296f50a8d246a77f31ca41752be1f7edc76f88e468d672a41d"}, + {file = "MDAnalysis-2.7.0-cp39-cp39-win_amd64.whl", hash = "sha256:345043a6365237a55d20f8dc0e31aa1d4e09e348ca8e1877dee6a2a6f6500c0b"}, + {file = "MDAnalysis-2.7.0.tar.gz", hash = "sha256:572e82945e5d058e3749ec5f18e6b3831ef7f2119cb54672567ae9a977201e93"}, +] + +[package.dependencies] +fasteners = "*" +GridDataFormats = ">=0.4.0" +joblib = ">=0.12" +matplotlib = ">=1.5.1" +mda-xdrlib = "*" +mmtf-python = ">=1.0.0" +numpy = ">=1.22.3,<2.0" +packaging = "*" +scipy = ">=1.5.0" +threadpoolctl = "*" +tqdm = ">=4.43.0" + +[package.extras] +analysis = ["biopython (>=1.80)", "networkx (>=2.0)", "scikit-learn", "seaborn", "tidynamics (>=1.0.0)"] +doc = ["mdanalysis-sphinx-theme (>=1.3.0)", "pybtex", "pybtex-docutils", "sphinx", "sphinx-sitemap", "sphinxcontrib-bibtex"] +extra-formats = ["chemfiles (>=0.10)", "gsd (>3.0.0)", "h5py (>=2.10)", "netCDF4 (>=1.0)", "parmed", "pyedr (>=0.7.0)", "pytng (>=0.2.3)", "rdkit (>=2020.03.1)"] + [[package]] name = "mdurl" version = "0.1.2" @@ -1785,6 +1885,104 @@ files = [ {file = "mistune-3.0.2.tar.gz", hash = "sha256:fc7f93ded930c92394ef2cb6f04a8aabab4117a91449e72dcc8dfa646a508be8"}, ] +[[package]] +name = "mmtf-python" +version = "1.1.3" +description = "A decoding libary for the PDB mmtf format" +optional = false +python-versions = "*" +files = [ + {file = "mmtf-python-1.1.3.tar.gz", hash = "sha256:12a02fe1b7131f0a2b8ce45b46f1e0cdd28b9818fe4499554c26884987ea0c32"}, + {file = "mmtf_python-1.1.3-py2.py3-none-any.whl", hash = "sha256:502031c509a8a6d73e042781abbd88b84c1afffe65097eb0c1b70f329ffd1e6e"}, +] + +[package.dependencies] +msgpack = ">=1.0.0" + +[package.extras] +dev = ["check-manifest"] +test = ["coverage"] + +[[package]] +name = "mrcfile" +version = "1.5.0" +description = "MRC file I/O library" +optional = false +python-versions = "*" +files = [ + {file = "mrcfile-1.5.0-py2.py3-none-any.whl", hash = "sha256:9c3af796b35d84dd2ddc357b8427d2034212ad58de0a8f0201c2a724b9e59ad1"}, + {file = "mrcfile-1.5.0.tar.gz", hash = "sha256:9e8196b8a33aa5b5237b43336cb9d08b6058b8127ea668d63b84d58bca795ec1"}, +] + +[package.dependencies] +numpy = ">=1.16.0" + +[[package]] +name = "msgpack" +version = "1.0.8" +description = "MessagePack serializer" +optional = false +python-versions = ">=3.8" +files = [ + {file = "msgpack-1.0.8-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:505fe3d03856ac7d215dbe005414bc28505d26f0c128906037e66d98c4e95868"}, + {file = "msgpack-1.0.8-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e6b7842518a63a9f17107eb176320960ec095a8ee3b4420b5f688e24bf50c53c"}, + {file = "msgpack-1.0.8-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:376081f471a2ef24828b83a641a02c575d6103a3ad7fd7dade5486cad10ea659"}, + {file = "msgpack-1.0.8-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5e390971d082dba073c05dbd56322427d3280b7cc8b53484c9377adfbae67dc2"}, + {file = "msgpack-1.0.8-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00e073efcba9ea99db5acef3959efa45b52bc67b61b00823d2a1a6944bf45982"}, + {file = "msgpack-1.0.8-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:82d92c773fbc6942a7a8b520d22c11cfc8fd83bba86116bfcf962c2f5c2ecdaa"}, + {file = "msgpack-1.0.8-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9ee32dcb8e531adae1f1ca568822e9b3a738369b3b686d1477cbc643c4a9c128"}, + {file = "msgpack-1.0.8-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e3aa7e51d738e0ec0afbed661261513b38b3014754c9459508399baf14ae0c9d"}, + {file = "msgpack-1.0.8-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:69284049d07fce531c17404fcba2bb1df472bc2dcdac642ae71a2d079d950653"}, + {file = "msgpack-1.0.8-cp310-cp310-win32.whl", hash = "sha256:13577ec9e247f8741c84d06b9ece5f654920d8365a4b636ce0e44f15e07ec693"}, + {file = "msgpack-1.0.8-cp310-cp310-win_amd64.whl", hash = "sha256:e532dbd6ddfe13946de050d7474e3f5fb6ec774fbb1a188aaf469b08cf04189a"}, + {file = "msgpack-1.0.8-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9517004e21664f2b5a5fd6333b0731b9cf0817403a941b393d89a2f1dc2bd836"}, + {file = "msgpack-1.0.8-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d16a786905034e7e34098634b184a7d81f91d4c3d246edc6bd7aefb2fd8ea6ad"}, + {file = "msgpack-1.0.8-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e2872993e209f7ed04d963e4b4fbae72d034844ec66bc4ca403329db2074377b"}, + {file = "msgpack-1.0.8-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c330eace3dd100bdb54b5653b966de7f51c26ec4a7d4e87132d9b4f738220ba"}, + {file = "msgpack-1.0.8-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:83b5c044f3eff2a6534768ccfd50425939e7a8b5cf9a7261c385de1e20dcfc85"}, + {file = "msgpack-1.0.8-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1876b0b653a808fcd50123b953af170c535027bf1d053b59790eebb0aeb38950"}, + {file = "msgpack-1.0.8-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:dfe1f0f0ed5785c187144c46a292b8c34c1295c01da12e10ccddfc16def4448a"}, + {file = "msgpack-1.0.8-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:3528807cbbb7f315bb81959d5961855e7ba52aa60a3097151cb21956fbc7502b"}, + {file = "msgpack-1.0.8-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e2f879ab92ce502a1e65fce390eab619774dda6a6ff719718069ac94084098ce"}, + {file = "msgpack-1.0.8-cp311-cp311-win32.whl", hash = "sha256:26ee97a8261e6e35885c2ecd2fd4a6d38252246f94a2aec23665a4e66d066305"}, + {file = "msgpack-1.0.8-cp311-cp311-win_amd64.whl", hash = "sha256:eadb9f826c138e6cf3c49d6f8de88225a3c0ab181a9b4ba792e006e5292d150e"}, + {file = "msgpack-1.0.8-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:114be227f5213ef8b215c22dde19532f5da9652e56e8ce969bf0a26d7c419fee"}, + {file = "msgpack-1.0.8-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d661dc4785affa9d0edfdd1e59ec056a58b3dbb9f196fa43587f3ddac654ac7b"}, + {file = "msgpack-1.0.8-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d56fd9f1f1cdc8227d7b7918f55091349741904d9520c65f0139a9755952c9e8"}, + {file = "msgpack-1.0.8-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0726c282d188e204281ebd8de31724b7d749adebc086873a59efb8cf7ae27df3"}, + {file = "msgpack-1.0.8-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8db8e423192303ed77cff4dce3a4b88dbfaf43979d280181558af5e2c3c71afc"}, + {file = "msgpack-1.0.8-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:99881222f4a8c2f641f25703963a5cefb076adffd959e0558dc9f803a52d6a58"}, + {file = "msgpack-1.0.8-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:b5505774ea2a73a86ea176e8a9a4a7c8bf5d521050f0f6f8426afe798689243f"}, + {file = "msgpack-1.0.8-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:ef254a06bcea461e65ff0373d8a0dd1ed3aa004af48839f002a0c994a6f72d04"}, + {file = "msgpack-1.0.8-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:e1dd7839443592d00e96db831eddb4111a2a81a46b028f0facd60a09ebbdd543"}, + {file = "msgpack-1.0.8-cp312-cp312-win32.whl", hash = "sha256:64d0fcd436c5683fdd7c907eeae5e2cbb5eb872fafbc03a43609d7941840995c"}, + {file = "msgpack-1.0.8-cp312-cp312-win_amd64.whl", hash = "sha256:74398a4cf19de42e1498368c36eed45d9528f5fd0155241e82c4082b7e16cffd"}, + {file = "msgpack-1.0.8-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:0ceea77719d45c839fd73abcb190b8390412a890df2f83fb8cf49b2a4b5c2f40"}, + {file = "msgpack-1.0.8-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1ab0bbcd4d1f7b6991ee7c753655b481c50084294218de69365f8f1970d4c151"}, + {file = "msgpack-1.0.8-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1cce488457370ffd1f953846f82323cb6b2ad2190987cd4d70b2713e17268d24"}, + {file = "msgpack-1.0.8-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3923a1778f7e5ef31865893fdca12a8d7dc03a44b33e2a5f3295416314c09f5d"}, + {file = "msgpack-1.0.8-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a22e47578b30a3e199ab067a4d43d790249b3c0587d9a771921f86250c8435db"}, + {file = "msgpack-1.0.8-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bd739c9251d01e0279ce729e37b39d49a08c0420d3fee7f2a4968c0576678f77"}, + {file = "msgpack-1.0.8-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:d3420522057ebab1728b21ad473aa950026d07cb09da41103f8e597dfbfaeb13"}, + {file = "msgpack-1.0.8-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:5845fdf5e5d5b78a49b826fcdc0eb2e2aa7191980e3d2cfd2a30303a74f212e2"}, + {file = "msgpack-1.0.8-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:6a0e76621f6e1f908ae52860bdcb58e1ca85231a9b0545e64509c931dd34275a"}, + {file = "msgpack-1.0.8-cp38-cp38-win32.whl", hash = "sha256:374a8e88ddab84b9ada695d255679fb99c53513c0a51778796fcf0944d6c789c"}, + {file = "msgpack-1.0.8-cp38-cp38-win_amd64.whl", hash = "sha256:f3709997b228685fe53e8c433e2df9f0cdb5f4542bd5114ed17ac3c0129b0480"}, + {file = "msgpack-1.0.8-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:f51bab98d52739c50c56658cc303f190785f9a2cd97b823357e7aeae54c8f68a"}, + {file = "msgpack-1.0.8-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:73ee792784d48aa338bba28063e19a27e8d989344f34aad14ea6e1b9bd83f596"}, + {file = "msgpack-1.0.8-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f9904e24646570539a8950400602d66d2b2c492b9010ea7e965025cb71d0c86d"}, + {file = "msgpack-1.0.8-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e75753aeda0ddc4c28dce4c32ba2f6ec30b1b02f6c0b14e547841ba5b24f753f"}, + {file = "msgpack-1.0.8-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5dbf059fb4b7c240c873c1245ee112505be27497e90f7c6591261c7d3c3a8228"}, + {file = "msgpack-1.0.8-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4916727e31c28be8beaf11cf117d6f6f188dcc36daae4e851fee88646f5b6b18"}, + {file = "msgpack-1.0.8-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:7938111ed1358f536daf311be244f34df7bf3cdedb3ed883787aca97778b28d8"}, + {file = "msgpack-1.0.8-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:493c5c5e44b06d6c9268ce21b302c9ca055c1fd3484c25ba41d34476c76ee746"}, + {file = "msgpack-1.0.8-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5fbb160554e319f7b22ecf530a80a3ff496d38e8e07ae763b9e82fadfe96f273"}, + {file = "msgpack-1.0.8-cp39-cp39-win32.whl", hash = "sha256:f9af38a89b6a5c04b7d18c492c8ccf2aee7048aff1ce8437c4683bb5a1df893d"}, + {file = "msgpack-1.0.8-cp39-cp39-win_amd64.whl", hash = "sha256:ed59dd52075f8fc91da6053b12e8c89e37aa043f8986efd89e61fae69dc1b011"}, + {file = "msgpack-1.0.8-py3-none-any.whl", hash = "sha256:24f727df1e20b9876fa6e95f840a2a2651e34c0ad147676356f4bf5fbb0206ca"}, + {file = "msgpack-1.0.8.tar.gz", hash = "sha256:95c02b0e27e706e48d0e5426d1710ca78e0f0628d6e89d5b5a5b91a5f12274f3"}, +] + [[package]] name = "nbclient" version = "0.10.0" @@ -2917,6 +3115,17 @@ docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"] test = ["pre-commit", "pytest (>=7.0)", "pytest-timeout"] typing = ["mypy (>=1.6,<2.0)", "traitlets (>=5.11.1)"] +[[package]] +name = "threadpoolctl" +version = "3.5.0" +description = "threadpoolctl" +optional = false +python-versions = ">=3.8" +files = [ + {file = "threadpoolctl-3.5.0-py3-none-any.whl", hash = "sha256:56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467"}, + {file = "threadpoolctl-3.5.0.tar.gz", hash = "sha256:082433502dd922bf738de0d8bcc4fdcbf0979ff44c42bd40f5af8a282f6fa107"}, +] + [[package]] name = "tinycss2" version = "1.3.0" @@ -3157,18 +3366,18 @@ test = ["websockets"] [[package]] name = "zipp" -version = "3.18.1" +version = "3.18.2" description = "Backport of pathlib-compatible object wrapper for zip files" optional = false python-versions = ">=3.8" files = [ - {file = "zipp-3.18.1-py3-none-any.whl", hash = "sha256:206f5a15f2af3dbaee80769fb7dc6f249695e940acca08dfb2a4769fe61e538b"}, - {file = "zipp-3.18.1.tar.gz", hash = "sha256:2884ed22e7d8961de1c9a05142eb69a247f120291bc0206a00a7642f09b5b715"}, + {file = "zipp-3.18.2-py3-none-any.whl", hash = "sha256:dce197b859eb796242b0622af1b8beb0a722d52aa2f57133ead08edd5bf5374e"}, + {file = "zipp-3.18.2.tar.gz", hash = "sha256:6278d9ddbcfb1f1089a88fde84481528b07b0e10474e09dcfe53dad4069fa059"}, ] [package.extras] docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] -testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy", "pytest-ruff (>=0.2.1)"] +testing = ["big-O", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy", "pytest-ruff (>=0.2.1)"] [extras] dask = ["dask"] @@ -3176,4 +3385,4 @@ dask = ["dask"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "9cd1d20a85a131647fa219721b10465ade872f4e236cd8394cf6113e20354f0f" +content-hash = "a368f4ad7e1af7297e4dd3f56136ebda511f4bd819426f642625144cdcc6d01b" diff --git a/pyproject.toml b/pyproject.toml index a50c336..e96a88d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,6 +22,7 @@ pre-commit = "^2.20.0" coverage = "^7.1.0" pytest-profiling = "^1.7.0" pytest-benchmark = "^4.0.0" +mdanalysis = "^2" [tool.poetry.group.notebook.dependencies] jupyterlab = "^4" diff --git a/tests/test_mdanalysis.py b/tests/test_mdanalysis.py new file mode 100644 index 0000000..346c1db --- /dev/null +++ b/tests/test_mdanalysis.py @@ -0,0 +1,38 @@ +"""Use the MDAnalysis library to read H5 files and check compliance with the H5MD standard.""" +import MDAnalysis as mda +from MDAnalysis.coordinates.H5MD import H5MDReader +import numpy as np +import ase.build +import pytest +import pathlib + +import znh5md + +@pytest.fixture +def trajectory() -> list[ase.Atoms]: + """Generate ase.Atoms objects that moves linearly in space.""" + water = ase.build.molecule("H2O") + atoms_list = [water] + while len(atoms_list) < 100: + atoms = atoms_list[-1].copy() + atoms.positions += [0.1, 0.1, 0.1] + atoms_list.append(atoms) + return atoms_list + +@pytest.fixture +def h5_trajectory(tmp_path, trajectory) -> pathlib.Path: + """Write the trajectory to an H5 file.""" + filename = tmp_path / "trajectory.h5" + db = znh5md.io.DataWriter(filename=filename) + db.initialize_database_groups() + reader = znh5md.io.AtomsReader(trajectory) + db.add(reader) + return filename + +def test_read_h5md(h5_trajectory): + u = mda.Universe.empty(3, n_residues=3, atom_resindex=np.arange(3), trajectory=True) + reader = H5MDReader(h5_trajectory, convert_units=False) + u.trajectory = reader + + assert len(u.trajectory) == 100 + diff --git a/znh5md/io/base.py b/znh5md/io/base.py index 0bdaf72..a593480 100644 --- a/znh5md/io/base.py +++ b/znh5md/io/base.py @@ -2,6 +2,7 @@ import dataclasses import logging import typing +import pathlib import h5py import numpy as np @@ -9,6 +10,7 @@ log = logging.getLogger(__name__) from znh5md.format import GRP, PARTICLES_GRP +import typing_extensions as te @dataclasses.dataclass @@ -124,6 +126,9 @@ def create_dataset(self, dataset_group: h5py.Group): value_ds = dataset_group.create_dataset( "value", maxshape=self.shape, data=self.value, chunks=True ) + # time_ds = dataset_group.create_dataset("time", data=np.arange(len(self.value)), chunks=True, maxshape=(None,)) + # dataset_group.create_dataset("step", data=np.arange(len(self.value)), chunks=True, maxshape=(None,)) + time_ds = dataset_group.create_dataset("time", data=self.time) dataset_group.create_dataset("step", data=self.step) @@ -140,6 +145,15 @@ def append_to_dataset(self, dataset_group: h5py.Group): dataset_group["value"][:] = np.concatenate( [dataset_group["value"][:n_current_frames], self.value] ) + # # append to time and step as well + # dataset_group["time"].resize(n_current_frames + len(self), axis=0) + # dataset_group["time"][:] = np.concatenate( + # [dataset_group["time"][:n_current_frames], np.arange(len(self.value))] + # ) + # dataset_group["step"].resize(n_current_frames + len(self), axis=0) + # dataset_group["step"][:] = np.concatenate( + # [dataset_group["step"][:n_current_frames], np.arange(len(self.value))] + # ) CHUNK_DICT = typing.Dict[str, ExplicitStepTimeChunk] @@ -181,18 +195,14 @@ class DataWriter: particles_path: str = "particles/atoms" observables_path: str = "observables/atoms" + @te.deprecated("DB will be initialized automatically when adding data.") def initialize_database_groups(self): """Create all groups that are required. We create the following groups: - particles/atoms """ - with h5py.File(self.filename, "w") as file: - particles = file.create_group("particles") - _ = particles.create_group("atoms") - - observables = file.create_group("observables") - _ = observables.create_group("atoms") + pass def _handle_special_cases_group_names(self, groupname: str) -> str: """Update group name in special cases. @@ -266,15 +276,25 @@ def add_chunk_data(self, **kwargs: CHUNK_DICT) -> None: kwargs: dict[str, ExplicitStepTimeChunk] The chunk data to write to the database. The key is the name of the group. """ + if not pathlib.Path(self.filename).exists(): + _ = h5py.File(self.filename, "w") # create the file with h5py.File(self.filename, "r+") as file: for group_name, chunk_data in kwargs.items(): if group_name == GRP.boundary: self.handle_boundary(file, chunk_data) else: if group_name in PARTICLES_GRP: - group_path = file[self.particles_path] + try: + group_path = file[self.particles_path] + except KeyError: + log.debug(f"creating particle groups {group_name}") + group_path = file.create_group(self.particles_path) else: - group_path = file[self.observables_path] + try: + group_path = file[self.observables_path] + except KeyError: + log.debug(f"creating observable groups {group_name}") + group_path = file.create_group(self.observables_path) try: self.add_data_to_group(group_path, group_name, chunk_data) From 7d2b59cb47d67b91666f5c2e51598af9caedffcf Mon Sep 17 00:00:00 2001 From: PythonFZ Date: Fri, 17 May 2024 10:27:45 +0200 Subject: [PATCH 05/11] fix time --- znh5md/io/base.py | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/znh5md/io/base.py b/znh5md/io/base.py index a593480..8b27132 100644 --- a/znh5md/io/base.py +++ b/znh5md/io/base.py @@ -126,11 +126,8 @@ def create_dataset(self, dataset_group: h5py.Group): value_ds = dataset_group.create_dataset( "value", maxshape=self.shape, data=self.value, chunks=True ) - # time_ds = dataset_group.create_dataset("time", data=np.arange(len(self.value)), chunks=True, maxshape=(None,)) - # dataset_group.create_dataset("step", data=np.arange(len(self.value)), chunks=True, maxshape=(None,)) - - time_ds = dataset_group.create_dataset("time", data=self.time) - dataset_group.create_dataset("step", data=self.step) + time_ds = dataset_group.create_dataset("time", data=np.arange(len(self.value)) * self.time, chunks=True, maxshape=(None,)) + dataset_group.create_dataset("step", data=np.arange(len(self.value)), chunks=True, maxshape=(None,)) if self.value_units is not None: value_ds.attrs["unit"] = self.value_units @@ -145,15 +142,15 @@ def append_to_dataset(self, dataset_group: h5py.Group): dataset_group["value"][:] = np.concatenate( [dataset_group["value"][:n_current_frames], self.value] ) - # # append to time and step as well - # dataset_group["time"].resize(n_current_frames + len(self), axis=0) - # dataset_group["time"][:] = np.concatenate( - # [dataset_group["time"][:n_current_frames], np.arange(len(self.value))] - # ) - # dataset_group["step"].resize(n_current_frames + len(self), axis=0) - # dataset_group["step"][:] = np.concatenate( - # [dataset_group["step"][:n_current_frames], np.arange(len(self.value))] - # ) + # append to time and step as well + dataset_group["time"].resize(n_current_frames + len(self), axis=0) + dataset_group["time"][:] = np.concatenate( + [dataset_group["time"][:n_current_frames], np.arange(len(self.value)) * self.time + n_current_frames * self.time] + ) + dataset_group["step"].resize(n_current_frames + len(self), axis=0) + dataset_group["step"][:] = np.concatenate( + [dataset_group["step"][:n_current_frames], np.arange(len(self.value)) + n_current_frames] + ) CHUNK_DICT = typing.Dict[str, ExplicitStepTimeChunk] From bfde7e2dac5263cbd9490ab8fbeac5d0a759f446 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 17 May 2024 08:27:57 +0000 Subject: [PATCH 06/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_mdanalysis.py | 11 +++++++---- znh5md/io/base.py | 28 +++++++++++++++++++++------- 2 files changed, 28 insertions(+), 11 deletions(-) diff --git a/tests/test_mdanalysis.py b/tests/test_mdanalysis.py index 346c1db..85e6434 100644 --- a/tests/test_mdanalysis.py +++ b/tests/test_mdanalysis.py @@ -1,13 +1,15 @@ """Use the MDAnalysis library to read H5 files and check compliance with the H5MD standard.""" +import pathlib + +import ase.build import MDAnalysis as mda -from MDAnalysis.coordinates.H5MD import H5MDReader import numpy as np -import ase.build import pytest -import pathlib +from MDAnalysis.coordinates.H5MD import H5MDReader import znh5md + @pytest.fixture def trajectory() -> list[ase.Atoms]: """Generate ase.Atoms objects that moves linearly in space.""" @@ -19,6 +21,7 @@ def trajectory() -> list[ase.Atoms]: atoms_list.append(atoms) return atoms_list + @pytest.fixture def h5_trajectory(tmp_path, trajectory) -> pathlib.Path: """Write the trajectory to an H5 file.""" @@ -29,10 +32,10 @@ def h5_trajectory(tmp_path, trajectory) -> pathlib.Path: db.add(reader) return filename + def test_read_h5md(h5_trajectory): u = mda.Universe.empty(3, n_residues=3, atom_resindex=np.arange(3), trajectory=True) reader = H5MDReader(h5_trajectory, convert_units=False) u.trajectory = reader assert len(u.trajectory) == 100 - diff --git a/znh5md/io/base.py b/znh5md/io/base.py index dd24afe..559595d 100644 --- a/znh5md/io/base.py +++ b/znh5md/io/base.py @@ -1,17 +1,18 @@ import abc import dataclasses import logging -import typing import pathlib +import typing import h5py import numpy as np log = logging.getLogger(__name__) -from znh5md.format import GRP, PARTICLES_GRP import typing_extensions as te +from znh5md.format import GRP, PARTICLES_GRP + @dataclasses.dataclass class StepTimeChunk: @@ -126,8 +127,15 @@ def create_dataset(self, dataset_group: h5py.Group): value_ds = dataset_group.create_dataset( "value", maxshape=self.shape, data=self.value, chunks=True ) - time_ds = dataset_group.create_dataset("time", data=np.arange(len(self.value)) * self.time, chunks=True, maxshape=(None,)) - dataset_group.create_dataset("step", data=np.arange(len(self.value)), chunks=True, maxshape=(None,)) + time_ds = dataset_group.create_dataset( + "time", + data=np.arange(len(self.value)) * self.time, + chunks=True, + maxshape=(None,), + ) + dataset_group.create_dataset( + "step", data=np.arange(len(self.value)), chunks=True, maxshape=(None,) + ) if self.value_units is not None: value_ds.attrs["unit"] = self.value_units @@ -145,11 +153,17 @@ def append_to_dataset(self, dataset_group: h5py.Group): # append to time and step as well dataset_group["time"].resize(n_current_frames + len(self), axis=0) dataset_group["time"][:] = np.concatenate( - [dataset_group["time"][:n_current_frames], np.arange(len(self.value)) * self.time + n_current_frames * self.time] + [ + dataset_group["time"][:n_current_frames], + np.arange(len(self.value)) * self.time + n_current_frames * self.time, + ] ) dataset_group["step"].resize(n_current_frames + len(self), axis=0) dataset_group["step"][:] = np.concatenate( - [dataset_group["step"][:n_current_frames], np.arange(len(self.value)) + n_current_frames] + [ + dataset_group["step"][:n_current_frames], + np.arange(len(self.value)) + n_current_frames, + ] ) @@ -274,7 +288,7 @@ def add_chunk_data(self, **kwargs: CHUNK_DICT) -> None: The chunk data to write to the database. The key is the name of the group. """ if not pathlib.Path(self.filename).exists(): - _ = h5py.File(self.filename, "w") # create the file + _ = h5py.File(self.filename, "w") # create the file with h5py.File(self.filename, "r+") as file: for group_name, chunk_data in kwargs.items(): if group_name == GRP.boundary: From debbc4a3cc8d1e723d493f707c023b6ea8db912d Mon Sep 17 00:00:00 2001 From: PythonFZ Date: Fri, 17 May 2024 10:35:10 +0200 Subject: [PATCH 07/11] test positions --- tests/test_mdanalysis.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/test_mdanalysis.py b/tests/test_mdanalysis.py index 85e6434..c9a740b 100644 --- a/tests/test_mdanalysis.py +++ b/tests/test_mdanalysis.py @@ -33,9 +33,10 @@ def h5_trajectory(tmp_path, trajectory) -> pathlib.Path: return filename -def test_read_h5md(h5_trajectory): - u = mda.Universe.empty(3, n_residues=3, atom_resindex=np.arange(3), trajectory=True) - reader = H5MDReader(h5_trajectory, convert_units=False) +def test_read_h5md(h5_trajectory, trajectory): + u = mda.Universe.empty(n_atoms=3, trajectory=True) + reader = H5MDReader(h5_trajectory) u.trajectory = reader - assert len(u.trajectory) == 100 + for ref, ts in zip(trajectory, u.trajectory): + assert np.allclose(ref.positions, ts.positions) From 1ec48e0e6c24b58dab40a56d0df880874d0ce9a4 Mon Sep 17 00:00:00 2001 From: PythonFZ Date: Fri, 17 May 2024 10:36:33 +0200 Subject: [PATCH 08/11] remove `initialize_database_groups` --- README.md | 1 - examples/ase.ipynb | 2 -- tests/test_ASEH5MD.py | 3 --- tests/test_benchmarks.py | 2 -- tests/test_custom_file_handle.py | 1 - tests/test_mdanalysis.py | 1 - tests/test_readers.py | 2 -- tests/test_writing.py | 1 - znh5md/cli.py | 3 --- 9 files changed, 16 deletions(-) diff --git a/README.md b/README.md index f8b5204..33bc20c 100644 --- a/README.md +++ b/README.md @@ -63,7 +63,6 @@ import ase atoms: list[ase.Atoms] db = znh5md.io.DataWriter(filename="db.h5") -db.initialize_database_groups() db.add(znh5md.io.AtomsReader(atoms)) # or znh5md.io.ChemfilesReader diff --git a/examples/ase.ipynb b/examples/ase.ipynb index ddc0c7d..0ccd8f9 100644 --- a/examples/ase.ipynb +++ b/examples/ase.ipynb @@ -69,7 +69,6 @@ "source": [ "%%time\n", "db = znh5md.io.DataWriter(filename=\"db.h5\")\n", - "db.initialize_database_groups()\n", "db.add(znh5md.io.AtomsReader(atoms_list, step=1, time=0.1))" ] }, @@ -253,7 +252,6 @@ ], "source": [ "db = znh5md.io.DataWriter(filename=\"from_file.h5\")\n", - "db.initialize_database_groups()\n", "db.add(znh5md.io.ASEFileReader(\"traj.xyz\", step=1, time=0.1))" ] }, diff --git a/tests/test_ASEH5MD.py b/tests/test_ASEH5MD.py index 4df907e..f51eab1 100644 --- a/tests/test_ASEH5MD.py +++ b/tests/test_ASEH5MD.py @@ -29,7 +29,6 @@ def test_get_slice(tmp_path, atoms_list, remove_calc): atoms.calc = None db = znh5md.io.DataWriter(filename="db.h5") - db.initialize_database_groups() db.add(znh5md.io.AtomsReader(atoms_list)) traj = znh5md.ASEH5MD("db.h5") @@ -56,7 +55,6 @@ def test_request_missing_properties(tmp_path, atoms_list, remove_calc): atoms.calc = None db = znh5md.io.DataWriter(filename="db.h5") - db.initialize_database_groups() if remove_calc: with pytest.raises(RuntimeError): @@ -77,7 +75,6 @@ def test_skip_property(tmp_path, atoms_list): os.chdir(tmp_path) db = znh5md.io.DataWriter(filename="db.h5") - db.initialize_database_groups() atoms_list[-1].arrays.pop("momenta") atoms_list[-1].get_momenta() diff --git a/tests/test_benchmarks.py b/tests/test_benchmarks.py index a423ffa..303b5aa 100644 --- a/tests/test_benchmarks.py +++ b/tests/test_benchmarks.py @@ -6,7 +6,6 @@ def test_bench_DataWriter(tmp_path, atoms_list, benchmark): os.chdir(tmp_path) db = znh5md.io.DataWriter(filename="db.h5") - db.initialize_database_groups() reader = znh5md.io.AtomsReader(atoms_list) benchmark(db.add, reader) @@ -14,7 +13,6 @@ def test_bench_DataWriter(tmp_path, atoms_list, benchmark): def test_bench_ASEH5MD(tmp_path, atoms_list, benchmark): os.chdir(tmp_path) db = znh5md.io.DataWriter(filename="db.h5") - db.initialize_database_groups() reader = znh5md.io.AtomsReader(atoms_list) db.add(reader) diff --git a/tests/test_custom_file_handle.py b/tests/test_custom_file_handle.py index 91dbd7d..7bc289e 100644 --- a/tests/test_custom_file_handle.py +++ b/tests/test_custom_file_handle.py @@ -12,7 +12,6 @@ def test_AtomsReader(tmp_path, atoms_list): print(tmp_path) db = znh5md.io.DataWriter(filename="db.h5") - db.initialize_database_groups() reader = znh5md.io.AtomsReader(atoms_list, frames_per_chunk=10) db.add(reader) diff --git a/tests/test_mdanalysis.py b/tests/test_mdanalysis.py index c9a740b..5d14113 100644 --- a/tests/test_mdanalysis.py +++ b/tests/test_mdanalysis.py @@ -27,7 +27,6 @@ def h5_trajectory(tmp_path, trajectory) -> pathlib.Path: """Write the trajectory to an H5 file.""" filename = tmp_path / "trajectory.h5" db = znh5md.io.DataWriter(filename=filename) - db.initialize_database_groups() reader = znh5md.io.AtomsReader(trajectory) db.add(reader) return filename diff --git a/tests/test_readers.py b/tests/test_readers.py index 1a7bc75..cb365dc 100644 --- a/tests/test_readers.py +++ b/tests/test_readers.py @@ -17,7 +17,6 @@ def test_AtomsReader(tmp_path, reader, atoms_list, use_add): print(tmp_path) db = znh5md.io.DataWriter(filename="db.h5") - db.initialize_database_groups() if reader == znh5md.io.AtomsReader: inputs = atoms_list @@ -72,7 +71,6 @@ def test_ChemfilesReader(tmp_path, atoms_list, frames_per_chunk): print(tmp_path) db = znh5md.io.DataWriter(filename="db.h5") - db.initialize_database_groups() inputs = "traj.xyz" ase.io.write(inputs, atoms_list) diff --git a/tests/test_writing.py b/tests/test_writing.py index bc42b8f..a9eadab 100644 --- a/tests/test_writing.py +++ b/tests/test_writing.py @@ -13,7 +13,6 @@ def test_DataWriter(tmp_path, atoms_list): os.chdir(tmp_path) db = DataWriter(filename="db.h5") - db.initialize_database_groups() reader = AtomsReader(atoms_list) diff --git a/znh5md/cli.py b/znh5md/cli.py index afcff8a..a9c9184 100644 --- a/znh5md/cli.py +++ b/znh5md/cli.py @@ -36,9 +36,6 @@ def convert(file: str, db_file: str): import znh5md db = znh5md.io.DataWriter(db_file) - if not pathlib.Path(db_file).exists(): - db.initialize_database_groups() - db.add(znh5md.io.ASEFileReader(file)) From 42885b1f08ea384fc8ea7408a09b5d55262e1658 Mon Sep 17 00:00:00 2001 From: PythonFZ Date: Fri, 17 May 2024 13:11:05 +0200 Subject: [PATCH 09/11] bump version to 0.2.0 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e96a88d..6b26d09 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "znh5md" -version = "0.1.9" +version = "0.2.0" description = "High Performance Interface for H5MD Trajectories" authors = ["zincwarecode "] license = "Apache-2.0" From eaf6661833e580cdb2f1bfa433cd5e87368e30e9 Mon Sep 17 00:00:00 2001 From: PythonFZ Date: Fri, 17 May 2024 13:15:49 +0200 Subject: [PATCH 10/11] test multiple python versions --- .github/workflows/pytest.yaml | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index acccafd..17e51b2 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -9,30 +9,39 @@ on: - cron: '14 3 * * 1' # at 03:14 on Monday. jobs: - test: - - runs-on: ubuntu-latest + pytest: + runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: python-version: + - "3.12" + - "3.11" - "3.10" + - "3.9" + os: + - ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install Poetry - uses: abatilo/actions-poetry@v2 + uses: snok/install-poetry@v1 with: - poetry-version: 1.3.2 + virtualenvs-create: true + virtualenvs-in-project: true + - name: Install package + run: | + poetry install --no-interaction --without=notebook --all-extras - name: Install package run: | poetry install --without=notebook --all-extras - name: Pytest run: | + poetry run python --version poetry run coverage run -m pytest poetry run coverage lcov - name: Coveralls From 3a30f69aff0b37ef9a0807e81ebce3abc3ecfcad Mon Sep 17 00:00:00 2001 From: PythonFZ Date: Fri, 17 May 2024 13:16:56 +0200 Subject: [PATCH 11/11] bump version test --- tests/test_znh5md.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_znh5md.py b/tests/test_znh5md.py index 2df752f..920105c 100644 --- a/tests/test_znh5md.py +++ b/tests/test_znh5md.py @@ -5,7 +5,7 @@ def test_version(): - assert znh5md.__version__ == "0.1.9" + assert znh5md.__version__ == "0.2.0" def test_shape(example_h5):