From 3fc5c7d22be68bc428981d7b6563b339f9ca1e2b Mon Sep 17 00:00:00 2001 From: FNTwin Date: Fri, 12 Jul 2024 16:20:39 -0400 Subject: [PATCH 1/2] Normalization entry doc, regressor API --- docs/API/regressor.md | 2 +- docs/e0s_and_qm.md | 16 +++++++++++----- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/docs/API/regressor.md b/docs/API/regressor.md index cd1a496..dff0ad9 100644 --- a/docs/API/regressor.md +++ b/docs/API/regressor.md @@ -1 +1 @@ -::: openqdc.utils.regressor \ No newline at end of file +::: openqdc.utils.regressor diff --git a/docs/e0s_and_qm.md b/docs/e0s_and_qm.md index f3f2acf..37a9f72 100644 --- a/docs/e0s_and_qm.md +++ b/docs/e0s_and_qm.md @@ -1,7 +1,7 @@ # Overview of QM Methods and Normalization OpenQDC provides support for 250+ QM Methods and provides a way to standardize and categorize -the usage of different level of theories used for Quantum Mechanics Single Point Calculations +the usage of different level of theories used for Quantum Mechanics Single Point Calculations to add value and information to the datasets. ## Level of Theory @@ -17,16 +17,22 @@ OpenQDC provides the computed the isolated atom energies `e0` for each QM method We provide support of energies through "physical" and "regression" normalization to conserve the size extensivity of chemical systems. -OpenQDC through this normalization, provide a way to transform the potential energy to atomization energy by subtracting isolated atom energies `e0` +OpenQDC through this normalization, provide a way to transform the potential energy to atomization energy by subtracting isolated atom energies `e0` physically interpretable and extensivity-conserving normalization method. Alternatively, we pre-335 compute the average contribution of each atom species to potential energy via linear or ridge336 -regression, centering the distribution at 0 and providing uncertainty estimation for the computed337 -values. Predicted atomic energies can also be scaled to approximate a standard normal distribution +regression, centering the distribution at 0 and providing uncertainty estimation for the computed +values. Predicted atomic energies can also be scaled to approximate a standard normal distribution. ### Physical Normalization +`e0` energies are calculated for each atom in the dataset at the appropriate level of theory and then subtracted from +the potential energy to obtain the atomization energy. This normalization method is physically interpretable and +only remove the atom energy contribution from the potential energy. ### Regression Normalization - +`e0` energies are calculated for each atom in the dataset from fitting a regression model to the potential energy. +The `e0` energies are then subtracted from the potential energy to obtain the atomization energy. This normalization +provides uncertainty estimation for the computed values and remove part of the interatomic energy contribution from the potential energy. +The resulting formation energy is centered at 0. From 44fcf163e3b829b6fafa71208ef4178696304591 Mon Sep 17 00:00:00 2001 From: FNTwin Date: Fri, 12 Jul 2024 16:29:23 -0400 Subject: [PATCH 2/2] circular import removal for tests --- openqdc/utils/regressor.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/openqdc/utils/regressor.py b/openqdc/utils/regressor.py index f9d5199..a980ef5 100644 --- a/openqdc/utils/regressor.py +++ b/openqdc/utils/regressor.py @@ -7,8 +7,6 @@ import pandas as pd from loguru import logger -from openqdc.datasets.base import BaseDataset - def non_nan_idxs(array): """ @@ -119,7 +117,7 @@ def __init__( self._post_init() @classmethod - def from_openqdc_dataset(cls, dataset: BaseDataset, *args, **kwargs) -> "Regressor": + def from_openqdc_dataset(cls, dataset, *args, **kwargs) -> "Regressor": """ Initialize the regressor object from an openqdc dataset. This is the default method. *args and and **kwargs are passed to the __init__ method and depends on the specific regressor.