Skip to content

Commit

Permalink
update to use avg formation E in outlier removal
Browse files Browse the repository at this point in the history
  • Loading branch information
mcneela committed Feb 22, 2024
1 parent 9c1010a commit 3d1cb52
Showing 1 changed file with 3 additions and 1 deletion.
4 changes: 3 additions & 1 deletion src/openqdc/datasets/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,8 @@ def _remove_outliers(
f"{avg_fn} is not a valid option, should be one of {list(BaseDataset.avg_options.keys())}"
)
logger.info(f"Removing outliers outside {avg_fn} +/- {num_stds} stds")
formation_E /= self.data["n_atoms"] # convert to avg formation energy / atom
formation_E = np.squeeze(formation_E.T) # remove extra array dimension and transpose
fn = BaseDataset.avg_options[avg_fn]
mid = fn(formation_E)
mask = np.logical_or(formation_E < mid - num_stds * formation_E.std(), formation_E > mid + num_stds * formation_E.std())
Expand Down Expand Up @@ -232,7 +234,7 @@ def _precompute_E(self):

# remove outliers if requested in __init__
if self.remove_outliers:
E = self._remove_outliers(np.squeeze(E.T),
E = self._remove_outliers(E,
avg_fn=self.avg_fn,
num_stds=self.num_stds)

Expand Down

0 comments on commit 3d1cb52

Please sign in to comment.