From fc4ce9afef0451e0997607819a68a478d83c3d0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Semih=20Cant=C3=BCrk?= Date: Fri, 24 Jan 2025 09:00:15 -0500 Subject: [PATCH] Revert "add dipeptides dataset" This reverts commit ba1430a96dea47a134cd89231a43406f273f400c. --- openqdc/datasets/potential/__init__.py | 2 - openqdc/datasets/potential/dipeptides.py | 81 ------------------------ 2 files changed, 83 deletions(-) delete mode 100644 openqdc/datasets/potential/dipeptides.py diff --git a/openqdc/datasets/potential/__init__.py b/openqdc/datasets/potential/__init__.py index a4ead5a..7beba8c 100644 --- a/openqdc/datasets/potential/__init__.py +++ b/openqdc/datasets/potential/__init__.py @@ -2,7 +2,6 @@ from .ani import ANI1, ANI1CCX, ANI1CCX_V2, ANI1X, ANI2X from .bpa import BPA from .comp6 import COMP6 -from .dipeptides import Dipeptides from .dummy import Dummy, PredefinedDataset from .gdml import GDML from .geom import GEOM @@ -38,7 +37,6 @@ "ANI2X": ANI2X, "BPA": BPA, "COMP6": COMP6, - "Dipeptides": Dipeptides, "GDML": GDML, "GEOM": GEOM, "ISO17": ISO17, diff --git a/openqdc/datasets/potential/dipeptides.py b/openqdc/datasets/potential/dipeptides.py deleted file mode 100644 index 2d8d9eb..0000000 --- a/openqdc/datasets/potential/dipeptides.py +++ /dev/null @@ -1,81 +0,0 @@ -import numpy as np -from openqdc.datasets.base import BaseDataset -from openqdc.methods import PotentialMethod - -def shape_atom_inputs(coords, atom_species): - reshaped_coords = coords.reshape(-1, 3) - frame, atoms, _ = coords.shape - z = np.tile(atom_species, frame) - xs = np.stack((z, np.zeros_like(z)), axis=-1) - return np.concatenate((xs, reshaped_coords), axis=-1, dtype=np.float32) - - -def read_npz_entry(folder): - data, name = create_path(folder) - data = np.load(data) - - nuclear_charges, coords, energies, forces = ( - data["nuclear_charges"], - data["coords"], - data["energies"], - data["forces"], - ) - frames = coords.shape[0] - res = dict( - name=np.array([name] * frames), - subset=np.array(["dipeptides"] * frames), - energies=energies[:, None].astype(np.float32), - forces=forces.reshape(-1, 3, 1).astype(np.float32), - atomic_inputs=shape_atom_inputs(coords, nuclear_charges), - n_atoms=np.array([len(nuclear_charges)] * frames, dtype=np.int32), - ) - return res - - -def create_path(folder): - name = folder.split("/")[-1] - return folder, name - -folder="/network/scratch/s/semih.canturk/cache/openqdc/dipeptides/npz_files/mol_73.npz" - -trajectories={ - "mol_73": folder -} - -class Dipeptides(BaseDataset): - """ - """ - - __name__ = "dipeptides" - - __energy_methods__ = [PotentialMethod.WB97M_D3BJ_DEF2_TZVPPD] - - energy_target_names = [ - "", - ] - - __energy_unit__ = "kj/mol" - __distance_unit__ = "ang" - __forces_unit__ = "kj/mol/ang" - - __force_mask__ = [False] - - @property - def data_types(self): - return { - "atomic_inputs": np.float32, - "position_idx_range": np.int32, - "energies": np.float32, - "forces": np.float32, - } - - def read_raw_entries(self): - entries_list = [] - - for dummy_name, path_to_npz in trajectories.items(): - entries_list.append(read_npz_entry(path_to_npz)) - return entries_list - - -# to store it in the cache and loading back (add the dataset in the __init__) -# Dipeptides.no_init().preprocess(upload=False, overwrite=True) \ No newline at end of file