diff --git a/src/openqdc/datasets/base.py b/src/openqdc/datasets/base.py index 6c939e7..16dc892 100644 --- a/src/openqdc/datasets/base.py +++ b/src/openqdc/datasets/base.py @@ -132,13 +132,16 @@ def _precompute_E(self): s = np.array(self.data["atomic_inputs"][:, :2], dtype=int) s[:, 1] += IsolatedAtomEnergyFactory.max_charge matrixs = [matrix[s[:, 0], s[:, 1]] for matrix in self.__isolated_atom_energies__] - matrixs = [np.split(matrix, splits_idx)[:-1] for matrix in matrixs] + # matrixs = [np.split(matrix, splits_idx)[:-1] for matrix in matrixs] converted_energy_data = self.convert_energy(self.data["energies"]) # calculation per molecule formation energy statistics - e = [] - for i in range(len(self.__energy_methods__)): - e.append(converted_energy_data[:, i] - np.array(list(map(lambda x: x.sum(), matrixs[i])))) - E = np.array(e).T + n = len(self.__energy_methods__) + E = [] + for i, matrix in enumerate(matrixs): + c = np.cumsum(np.append([0], matrix))[splits_idx] + c[1:] = c[1:] - c[:-1] + E.append(converted_energy_data[:, i] -c) + E = np.array(E).T formation_E_mean = np.nanmean(E, axis=0) formation_E_std = np.nanstd(E, axis=0) total_E_mean = np.nanmean(converted_energy_data, axis=0)