diff --git a/src/nomad_simulations/schema_packages/atoms_state.py b/src/nomad_simulations/schema_packages/atoms_state.py index 32fbdd11..790df1e7 100644 --- a/src/nomad_simulations/schema_packages/atoms_state.py +++ b/src/nomad_simulations/schema_packages/atoms_state.py @@ -641,3 +641,105 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: self.chemical_symbol = self.resolve_chemical_symbol(logger=logger) if self.atomic_number is None: self.atomic_number = self.resolve_atomic_number(logger=logger) + + +class MolecularOrbitals(Entity): + """ + This class stores all molecular orbitals (MO) in a single container, with each Quantity using + arrays indexed by mo_num and ao_num. + + Comparison to TREXIO: + - mo/type -> mo_type + - mo/num -> mo_num + - mo/coefficient -> coefficient + - mo/coefficient_im -> coefficient_im + - mo/symmetry -> symmetry + - mo/occupation -> occupation + - mo/energy -> energy + - mo/spin -> spin + + """ + + mo_num = Quantity( + type=np.int32, + description=""" + Number of molecular orbitals. + """, + ) + + ao_num = Quantity( + type=np.int32, + description=""" + Number of atomic orbitals or basis functions (often needed for coefficient shape). + Corresponds to the 'ao.num' dimension in TREXIO. + """, + ) + + mo_type = Quantity( + type=str, + shape=['mo_num'], + description=""" + Type of the molecular orbitals + e.g. 'canonical', 'localized'. + In case of CASSCF calculations, there will be orbital subspaces of different nature. + E.g. : + Internal orbitals : canonical + Active orbitals : natural + Virtual orbitals : canonical + """, + ) + + coefficient = Quantity( + type=np.float64, + shape=['mo_num', 'ao_num'], + description=""" + Real part of the MO coefficients. The shape is + [mo.num, ao.num], meaning each row corresponds to one MO, and each column + to one atomic orbital (or basis function). + """, + ) + + coefficient_im = Quantity( + type=np.float64, + shape=['mo_num', 'ao_num'], + description=""" + Imaginary part of the MO coefficients. The shape is + [mo.num, ao.num]. This array may be omitted or set to zero if the orbitals + are purely real. + """, + ) + + symmetry = Quantity( + type=str, + shape=['mo_num'], + description=""" + Symmetry label for each MO, e.g. group-theory labels or + simpler 'sigma', 'pi', 'delta'. + """, + ) + + occupation = Quantity( + type=np.float64, + shape=['mo_num'], + description=""" + Occupation numbers for each MO. Typically in [0, 2] + for closed-shell systems, but might be fractional in open-shell systems or multi-reference calculations. + """, + ) + + energy = Quantity( + type=np.float64, + shape=['mo_num'], + description=""" + Orbital energies for each MO. + """, + ) + + spin = Quantity( + type=np.int32, + shape=['mo_num'], + description=""" + Spin channel for each MO if this is an unrestricted open-shell set. + Typically 0 for alpha, 1 for beta. + """, + ) diff --git a/src/nomad_simulations/schema_packages/basis_set.py b/src/nomad_simulations/schema_packages/basis_set.py index bbd763e0..536a39dd 100644 --- a/src/nomad_simulations/schema_packages/basis_set.py +++ b/src/nomad_simulations/schema_packages/basis_set.py @@ -13,7 +13,7 @@ from nomad import utils from nomad.datamodel.data import ArchiveSection from nomad.datamodel.metainfo.annotations import ELNAnnotation -from nomad.metainfo import MEnum, Quantity, SubSection +from nomad.metainfo import JSON, MEnum, Quantity, SubSection from nomad.units import ureg from nomad_simulations.schema_packages.atoms_state import AtomsState @@ -187,28 +187,253 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: class AtomCenteredFunction(ArchiveSection): """ - Specifies a single function (term) in an atom-centered basis set. + Specifies a single contracted basis function in an atom-centered basis set. + + In many quantum-chemistry codes, an atom-centered basis set is composed of + several "shells," each shell containing one or more basis functions of a certain + angular momentum. For instance, a shell of p-type orbitals (L=1) typically + consists of 3 degenerate functions (p_x, p_y, p_z) if `harmonic_type='cartesian'` + or 3 spherical harmonics if `harmonic_type='spherical'`. + + A single "atom-centered function" can be a linear combination of multiple + primitive Gaussians (or Slater-type orbitals, STOs). + In practice, these contract together to form the final basis function used by + the SCF or post-SCF method. Often, each contraction is labeled by its + angular momentum (e.g., s, p, d, f) and a set of exponents and coefficients. + + **References**: + - T. Helgaker, P. Jørgensen, J. Olsen, *Molecular Electronic-Structure Theory*, Wiley (2000). + - F. Jensen, *Introduction to Computational Chemistry*, 2nd ed., Wiley (2007). + - J. B. Foresman, Æ. Frisch, *Exploring Chemistry with Electronic Structure Methods*, Gaussian Inc. """ - pass + harmonic_type = Quantity( + type=MEnum( + 'spherical', + 'cartesian', + ), + default='spherical', + description=""" + Specifies whether the basis functions are expanded in **spherical** (pure) + harmonics or **cartesian** harmonics. Many modern quantum-chemistry codes + default to *spherical harmonics* for d, f, g..., which eliminates the + redundant functions found in the cartesian sets. + + - `'spherical'` : (2l+1) functions for a shell of angular momentum l + - `'cartesian'` : (l+1)(l+2)/2 functions for that shell (extra functions appear) + """, + ) + + function_type = Quantity( + type=MEnum( + 's', + 'p', + 'd', + 'f', + 'g', + 'h', + 'i', + 'j', + 'k', + 'l', + 'sp', + 'spd', + 'spdf', + ), + description=""" + Symbolic label for the **angular momentum** of this contracted function. + Typical single-letter labels: + - 's' => L=0 + - 'p' => L=1 + - 'd' => L=2 + - 'f' => L=3 + - 'g' => L=4 + - 'h', 'i', etc. => still higher angular momenta + Combined labels like 'sp' or 'spdf' indicate a **combined shell** in which + multiple angular momenta share exponents. For example, in some older Pople + basis sets, an 'sp' shell has an s- and p-type function sharing the same + exponents but different contraction coefficients. + """, + ) + + n_primitive = Quantity( + type=np.int32, + description=""" + Number of **primitive** functions in this contracted basis function. + For example, in a contracted Gaussian-type orbital (GTO) approach, each basis + function might be built from a sum of `n_primitive` Gaussians with different + exponents, each scaled by a contraction coefficient. + """, + ) + + exponents = Quantity( + type=np.float32, + shape=['n_primitive'], + description=""" + The **exponents** of each primitive basis function. + In a Gaussian basis set (GTO), these are the alpha_i in + exp(-alpha_i * r^2). In a Slater-type basis (STO), they'd be + exp(-zeta_i * r). Typically sorted from largest to smallest. + """, + ) + + contraction_coefficients = Quantity( + type=np.float32, + shape=['*'], # Flexible shape to handle combined types (e.g. SP, SPD..) + description=""" + The **contraction coefficients** associated with each primitive exponent. + In the simplest case (pure s- or p-function), this array has length + equal to `n_primitive`. For combined shells (like 'sp'), the length + might be `2 * n_primitive`, because you have separate coefficients + for the s-part and the p-part. + """, + ) + + point_charge = Quantity( + type=np.float32, + description=""" + If using a basis function that explicitly includes a point-charge or an + ECP-like pseudo-component, this field can store that net charge. + Typically 0 for standard GTO or STO expansions. + Some extended basis concepts (or embedded charges) might set it differently. + """, + ) + + def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: + """ + Validates the input data + and resolves combined types like SP, SPD, SPDF, etc. + + Raises ValueError: If the data is inconsistent (e.g., mismatch in exponents and coefficients). + """ + super().normalize(archive, logger) - # TODO: design system for writing basis functions like gaussian or slater orbitals + # Validate number of primitives + if self.n_primitive is not None: + if self.exponents is not None and len(self.exponents) != self.n_primitive: + raise ValueError( + f'Mismatch in number of exponents: expected {self.n_primitive}, ' + f'found {len(self.exponents)}.' + ) + + # For combined shells (like 'sp', 'spd', etc.), ensure the coefficient array is large enough + if self.function_type and len(self.function_type) > 1: + num_types = len(self.function_type) # For SP: 2, SPD: 3, etc. + if self.contraction_coefficients is not None: + expected_coeffs = num_types * self.n_primitive + if len(self.contraction_coefficients) != expected_coeffs: + raise ValueError( + f'Mismatch in contraction coefficients for {self.function_type} type: ' + f'expected {expected_coeffs}, found {len(self.contraction_coefficients)}.' + ) + + # Split coefficients into separate lists for each type + self.coefficient_sets = { + t: self.contraction_coefficients[i::num_types] + for i, t in enumerate(self.function_type) + } + + # Debug: Log split coefficients + for t, coeffs in self.coefficient_sets.items(): + logger.info(f'{t}-type coefficients: {coeffs}') + else: + logger.warning( + f'No contraction coefficients provided for {self.function_type} type.' + ) + + # For single types, ensure coefficients match primitives + elif self.contraction_coefficients is not None: + if len(self.contraction_coefficients) != self.n_primitive: + raise ValueError( + f'Mismatch in contraction coefficients: expected {self.n_primitive}, ' + f'found {len(self.contraction_coefficients)}.' + ) class AtomCenteredBasisSet(BasisSetComponent): """ - Defines an atom-centered basis set. + Defines an **atom-centered basis set** for quantum chemistry calculations. + Unlike plane-wave methods, these expansions are typically built around each atom's + position, using either: + - Slater-type orbitals (STO) + - Gaussian-type orbitals (GTO) + - Numerical atomic orbitals (NAO) + - Effective-core potentials or point-charges (PC, cECP, etc.) + + This section references multiple `AtomCenteredFunction` objects, each describing + a single contracted function or shell. Additionally, one can label the overall + basis set name (e.g., "cc-pVTZ", "def2-SVP", "6-31G**") and specify the high-level + role of the basis set in the calculation. + + **Common examples**: + - **Pople basis** (3-21G, 6-31G(d), 6-311++G(2df,2pd), etc.) + - **Dunning correlation-consistent** (cc-pVDZ, cc-pVTZ, aug-cc-pVTZ, etc.) + - **Slater basis** used in ADF, for instance + - **ECP** (Effective Core Potential) expansions like LANL2DZ or SDD for transition metals + + **References**: + - F. Jensen, *Introduction to Computational Chemistry*, 2nd ed., Wiley (2007). + - A. Szabo, N. S. Ostlund, *Modern Quantum Chemistry*, McGraw-Hill (1989). + - T. H. Dunning Jr., J. Chem. Phys. 90, 1007 (1989) for correlation-consistent basis sets. """ + basis_set = Quantity( + type=str, + description=""" + **Name** or label of the basis set as recognized by the code or standard + library. Examples: "6-31G*", "cc-pVTZ", "def2-SVP", "STO-3G", "LANL2DZ" (ECP). + """, + ) + + type = Quantity( + type=MEnum( + 'STO', # Slater-type orbitals + 'GTO', # Gaussian-type orbitals + 'NAO', # Numerical atomic orbitals + 'cECP', # Capped effective core potentials + 'PC', # Point charges + ), + description=""" + The **functional form** of the basis set: + - 'STO': Slater-type orbitals + - 'GTO': Gaussian-type orbitals + - 'NAO': Numerical atomic orbitals + - 'cECP': Some variant of a "capped" or shape-consistent ECP + - 'PC': Point charges (or ghost basis centers) + + If a code uses a mixture (e.g., GTO + ECP), it might either store them + as separate `AtomCenteredBasisSet` sections or unify them if the code does so internally. + """, + ) + + role = Quantity( + type=MEnum( + 'orbital', + 'auxiliary_scf', + 'auxiliary_post_hf', + 'cabs', + ), + description=""" + The role of this basis set in the calculation: + - 'orbital': main orbital basis for the SCF + - 'auxiliary_scf': used for RI-J or density fitting in SCF + - 'auxiliary_post_hf': used in MP2, CC, etc. + - 'cabs': complementary auxiliary basis for explicitly correlated (F12) methods. + """, + ) + + total_number_of_basis_functions = Quantity( + type=np.int32, + description=""" + The **total** number of contracted basis functions in this entire set. + This is typically the sum of all `(2l+1)` or cartesian expansions across + all shells on all relevant atoms (within the scope of this section). + """, + ) + functional_composition = SubSection( sub_section=AtomCenteredFunction.m_def, repeats=True - ) # TODO change name - - def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: - super().normalize(archive, logger) - # self.name = self.m_def.name - # TODO: set name based on basis functions - # ? use basis set names from Basis Set Exchange + ) class APWBaseOrbital(ArchiveSection): diff --git a/src/nomad_simulations/schema_packages/model_method.py b/src/nomad_simulations/schema_packages/model_method.py index c7b143ff..c0e20aa1 100644 --- a/src/nomad_simulations/schema_packages/model_method.py +++ b/src/nomad_simulations/schema_packages/model_method.py @@ -11,7 +11,11 @@ from nomad.metainfo import Context from structlog.stdlib import BoundLogger -from nomad_simulations.schema_packages.atoms_state import CoreHole, OrbitalsState +from nomad_simulations.schema_packages.atoms_state import ( + CoreHole, + MolecularOrbitals, + OrbitalsState, +) from nomad_simulations.schema_packages.model_system import ModelSystem from nomad_simulations.schema_packages.numerical_settings import NumericalSettings from nomad_simulations.schema_packages.utils import is_not_representative @@ -1221,3 +1225,286 @@ class DMFT(ModelMethodElectronic): def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: super().normalize(archive, logger) + + +class IntegralDecomposition(BaseModelMethod): + """ + A general class for integral decomposition techniques that approximate + Coulomb and/or exchange integrals to reduce computational cost in quantum + chemistry. Examples include: + + - Resolution of the Identity (RI, a.k.a. density fitting) + - Chain-of-Spheres exchange (COSX) + - Cholesky Decomposition (CD) + - Other domain-based or rank-reduced approximations + + Typical references: + - F. Weigend, M. Häser, The RI-MP2 method: Algorithmic + implementation of efficient, approximate MP2 theories, + Theor. Chem. Acc. 97, 331-340 (1997). + - S. Hättig, F. Weigend, J. Chem. Phys. 113, 5154 (2000). (RI-J) + - Neese et al., “Chain-of-spheres algorithms for HF exchange,” + Chem. Phys. 356 (2008), 98-109. + """ + + approximation_type = Quantity( + type=MEnum('RIJ', 'RIJK', 'RIK', 'SENEX', 'RIJCOSX'), + description=""" + RIJ : also known as RIJONX, where only Coulomb integrals are approximated. + RIJK : both Coulomb and exchange integrals. + RIJCOSX : RIJ for Coulomb and COSX for HF exchange. + SENEX : Similar to COSX, relevant for Turbomole. + """, + ) + + approximated_term = Quantity( + type=MEnum('coulomb', 'exchange', 'mp2', 'cc', 'explicit_correlation', 'other'), + description=""" + Which terms are approximated by this method: + - 'coulomb': only the J integrals + - 'exchange': only K integrals + - 'mp2': MP2 integrals + - 'cc': Coupled Cluster integrals + - 'explicit_correlation': e.g. R12, F12 integrals + """, + ) + + +class HartreeFock(ModelMethodElectronic): + """ + Defines a Hartree–Fock (HF) calculation using a specified reference determinant + (RHF, UHF, or ROHF). + + In HF theory: + - RHF = Restricted Hartree–Fock, for closed-shell systems. + - UHF = Unrestricted Hartree–Fock, allows different orbitals for alpha/beta spin. + - ROHF = Restricted Open-Shell Hartree–Fock, a partially restricted approach for + open-shell systems. + + **References**: + - Roothaan, C. C. J. (1951). "New Developments in Molecular Orbital Theory." + Rev. Mod. Phys. 23, 69. + - Szabo, A., & Ostlund, N. S. (1989). *Modern Quantum Chemistry*. McGraw-Hill. + - Jensen, F. (2007). *Introduction to Computational Chemistry*. 2nd ed., Wiley. + """ + + reference_determinant = Quantity( + type=MEnum('UHF', 'RHF', 'ROHF'), + description=""" + Type of reference determinant. + """, + ) + + molecular_orbitals_ref = Quantity( + type=MolecularOrbitals, + description=""" + Final, converged molecular orbitals from the Hartree–Fock SCF procedure. This includes + orbital energies, occupations, symmetry labels, and spin channels if applicable. + """, + ) + + def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: + """ + Perform minimal consistency checks between the HF reference determinant + and the final molecular orbitals spin array (if available).NumericalIntegration + """ + super().normalize(archive, logger) + + if self.molecular_orbitals is not None: + # If the user is storing a spin array for MOs, check for consistency with the determinant + mo_spin = self.molecular_orbitals.spin + if mo_spin is not None and len(mo_spin) > 0: + unique_spins = np.unique(mo_spin) + if self.reference_determinant == 'RHF': + # For RHF, we typically expect spin=0 (alpha) only + if not np.all(mo_spin == 0): + logger.warning( + 'RHF reference used, but molecular_orbitals.spin contains non-zero spin indices.' + ) + elif self.reference_determinant == 'UHF': + # UHF often has spin=0 for alpha and spin=1 for beta + # If we only see spin=0, that's effectively no spin polarization + if len(unique_spins) == 1 and unique_spins[0] == 0: + logger.info( + 'UHF reference chosen, but only alpha spin found in MOs (spin=0). ' + 'This might still be valid if spin polarization is zero.' + ) + # For ROHF, spin indexing can vary across codes, so no strict check here. + + +class PerturbationMethod(ModelMethodElectronic): + type = Quantity( + type=MEnum('MP', 'RS', 'BW'), + default='MP', + description=""" + Perturbation approach. The abbreviations stand for: + | Abbreviation | Description | + | ------------ | ----------- | + | `'MP'` | Møller-Plesset | + | `'RS'` | Rayleigh-Schrödigner | + | `'BW'` | Brillouin-Wigner | + """, + ) + + order = Quantity( + type=np.int32, + description=""" + Order up to which the perturbation is expanded. + """, + a_eln=ELNAnnotation(component='NumberEditQuantity'), + ) + + density = Quantity( + type=MEnum('relaxed', 'unrelaxed'), + description=""" + unrelaxed density: MP2 expectation value density. + relaxed density : incorporates orbital relaxation. + """, + ) + + spin_component_scaling = Quantity( + type=MEnum('none', 'SCS', 'SOS', 'custom'), + default='none', + description=""" + Spin-component scaling approach for perturbation methods: + - none : no spin-component scaling + - SCS : spin-component scaled (Grimme's approach, https://doi.org/10.1002/wcms.1110) + - SOS : spin-opposite scaled + - custom: user-defined scaling factors + This is typically relevant only for MP2 calculations. + """, + ) + + +class LocalCorrelation(ArchiveSection): + """ + A base section used to define the parameters of a local correlation method for + post-HF calculations, e.g. LMP2, LCC, or domain-based local pair natural orbitals + (PNO, LPNO, DLPNO) in coupled cluster or double-hybrid DFT. + + Typical references: + - Pulay, Chem. Phys. Lett. 100, 151 (1983) (LMP2 concept). + - G. Knizia, G. K.-L. Chan, “Density Matrix Embedding,” J. Chem. Theory Comput. 9, 1428 (2013). + - F. Neese, “The ORCA program system,” WIREs Comput. Mol. Sci. 2, 73-78 (2012) + for DLPNO approaches. + """ + + type = Quantity( + type=MEnum('PNO', 'LPNO', 'DLPNO', 'LMP2', 'other'), + description=""" + The local correlation flavor: + - 'PNO' : Pair Natural Orbitals in generic form + - 'LPNO' : Local PNO approach + - 'DLPNO' : Domain-based Local PNO approach + - 'LMP2' : Local MP2 (Pulay approach) + - 'other' : Another local correlation scheme + """, + ) + + ansatz = Quantity( + type=MEnum('MP2', 'CC', 'DH-DFT'), + description=""" + The underlying ansatz for the local correlation treatment. + LMP2 + LCC + DH-DFT: double-hybrid DFT + ... + """, + ) + + +class CoupledCluster(ModelMethodElectronic): + """ + A base section used to define the parameters of a Coupled Cluster calculation. + A standard schema is defined, though the most common cases can be summarized in the `type` quantity. + """ + + type = Quantity( + type=str, + description=""" + String labeling the Coupled Cluster flavor (e.g., CC2, CC3, CCD, CCSD, CCSDT, etc.). + If a known standard approach, it might match these examples: + - CC2, CC3 : approximate methods for excited states + - CCD : Coupled Cluster Doubles + - CCSD : Singles and Doubles + - CCSDT : Singles, Doubles, and Triples + - CCSDTQ : Singles, Doubles, Triples, and Quadruples + By default, the "perturbative corrections" like (T) are not included in this string. + """, + a_eln=ELNAnnotation(component='StringEditQuantity'), + ) + + reference_determinant = Quantity( + type=MEnum('UHF', 'RHF', 'ROHF', 'UKS', 'RKS', 'ROKS'), + description=""" + The type of reference determinant on which the Coupled Cluster expansion is built. + - UHF / RHF / ROHF : common for wavefunction-based reference + - UKS / RKS / ROKS : if a KS-DFT reference is used (rare but possible). + """, + ) + + excitation_order = Quantity( + type=np.int32, + shape=['*'], + description=""" + The excitation orders explicitly included in the cluster operator, e.g. [1,2] + for CCSD. + - 1 = singles + - 2 = doubles + - 3 = triples + - 4 = quadruples, etc. + Example: CCSDT => [1, 2, 3]. + """, + ) + + perturbative_correction_order = Quantity( + type=np.int32, + shape=['*'], + description=""" + The excitation orders included only in a perturbative manner. + For instance, in CCSD(T), singles and doubles are solved iteratively, + while triples appear as a perturbative correction => [3]. + """, + ) + + perturbative_correction = Quantity( + type=MEnum('(T)', '[T]', '(T0)', '[T0]', '(Q)'), + description=""" + Label for the perturbative corrections: + - '(T)' : standard perturbative triples + - '[T]' : Brueckner-based or other variant + - '(T0)' : approximate version of (T) + - '[T0]' : approximate, typically for Brueckner references + - '(Q)' : perturbative quadruples, e.g., CCSDT(Q) + - 'none' : no perturbative correction + """, + ) + + perturbation_method = SubSection(sub_section=PerturbationMethod.m_def) + + local_correlation = SubSection(sub_section=LocalCorrelation.m_def, repeats=True) + + integral_decomposition = SubSection( + sub_section=IntegralDecomposition.m_def, repeats=True + ) + + explicit_correlation = Quantity( + type=MEnum('F12', 'F12a', 'F12b', 'F12c', 'R12', ''), + default='', + description=""" + Explicit correlation treatment. + These methods introduce the interelectronic distance coordinate + directly into the wavefunction to treat dynamical electron correlation. + It can be added linearly (R12) or exponentially (F12). + """, + ) + + is_frozencore = Quantity( + type=bool, + description=""" + Flag for frozencore approximation. + In post-HF calculation only the valence electrons are typically correlated. + The others are kept frozen. + FC approximations differ between quantum chemistry codes. + """, + ) diff --git a/src/nomad_simulations/schema_packages/model_system.py b/src/nomad_simulations/schema_packages/model_system.py index 66364d5e..33ee56e7 100644 --- a/src/nomad_simulations/schema_packages/model_system.py +++ b/src/nomad_simulations/schema_packages/model_system.py @@ -1113,6 +1113,21 @@ class ModelSystem(System): """, ) + total_charge = Quantity( + type=np.int32, + description=""" + Total charge of the system. + """, + ) + + total_spin = Quantity( + type=np.int32, + description=""" + Total spin state of the system. + Not to be confused with the spin multiplicity 2S + 1. + """, + ) + model_system = SubSection(sub_section=SectionProxy('ModelSystem'), repeats=True) def resolve_system_type_and_dimensionality( diff --git a/src/nomad_simulations/schema_packages/numerical_settings.py b/src/nomad_simulations/schema_packages/numerical_settings.py index 515deea7..b43a2177 100644 --- a/src/nomad_simulations/schema_packages/numerical_settings.py +++ b/src/nomad_simulations/schema_packages/numerical_settings.py @@ -52,66 +52,52 @@ class Smearing(NumericalSettings): class Mesh(ArchiveSection): """ - A base section used to specify the settings of a sampling mesh. - It supports uniformly-spaced meshes and symmetry-reduced representations. + A base section used to define the mesh or space partitioning over which a discrete numerical integration is performed. """ - spacing = Quantity( - type=MEnum('Equidistant', 'Logarithmic', 'Tan'), - shape=['dimensionality'], + dimensionality = Quantity( + type=np.int32, + default=3, description=""" - Identifier for the spacing of the Mesh. Defaults to 'Equidistant' if not defined. It can take the values: - - | Name | Description | - | --------- | -------------------------------- | - | `'Equidistant'` | Equidistant grid (also known as 'Newton-Cotes') | - | `'Logarithmic'` | log distance grid | - | `'Tan'` | Non-uniform tan mesh for grids. More dense at low abs values of the points, while less dense for higher values | + Dimensionality of the mesh: 1, 2, or 3. Defaults to 3. """, ) - quadrature = Quantity( - type=MEnum( - 'Gauss-Legendre', - 'Gauss-Laguerre', - 'Clenshaw-Curtis', - 'Newton-Cotes', - 'Gauss-Hermite', - ), + mesh_type = Quantity( + type=MEnum('equidistant', 'logarithmic', 'tan'), + shape=['dimensionality'], description=""" - Quadrature rule used for integration of the Mesh. This quantity is relevant for 1D meshes: + Kind of mesh identifying the spacing in each of the dimensions specified by `dimensionality`. It can take the values: | Name | Description | | --------- | -------------------------------- | - | `'Gauss-Legendre'` | Quadrature rule for integration using Legendre polynomials | - | `'Gauss-Laguerre'` | Quadrature rule for integration using Laguerre polynomials | - | `'Clenshaw-Curtis'` | Quadrature rule for integration using Chebyshev polynomials using discrete cosine transformations | - | `'Gauss-Hermite'` | Quadrature rule for integration using Hermite polynomials | + | `'equidistant'` | Equidistant grid (also known as 'Newton-Cotes') | + | `'logarithmic'` | log distance grid | + | `'Tan'` | Non-uniform tan mesh for grids. More dense at low abs values of the points, while less dense for higher values | """, - ) # ! @JosePizarro3 I think that this is separate from the spacing + ) - n_points = Quantity( + grid = Quantity( type=np.int32, + shape=['dimensionality'], description=""" - Number of points in the mesh. + Number of points sampled along each axis of the mesh. """, ) - dimensionality = Quantity( + n_points = Quantity( type=np.int32, - default=3, description=""" - Dimensionality of the mesh: 1, 2, or 3. Defaults to 3. + Total number of points in the mesh. """, ) - grid = Quantity( - type=np.int32, + spacing = Quantity( + type=np.float64, shape=['dimensionality'], - description=""" - Amount of mesh point sampling along each axis. See `type` for the axes definition. + description="""Grid spacing for equidistant meshes. Ignored for other kinds. """, - ) # ? @JosePizzaro3: should the mesh also contain its boundary information + ) points = Quantity( type=np.complex128, @@ -126,25 +112,102 @@ class Mesh(ArchiveSection): shape=['n_points'], description=""" The amount of times the same point reappears. A value larger than 1, typically indicates - a symmetry operation that was applied to the `Mesh`. This quantity is equivalent to `weights`: + a symmetry operation that was applied to the `Mesh`. + """, + ) + + pruning = Quantity( + type=MEnum('fixed', 'adaptive'), + description=""" + Pruning method applied for reducing the amount of points in the Mesh. This is typically + used for numerical integration near the core levels in atoms. + In the fixed grid methods, the number of angular grid points is predetermined for + ranges of radial grid points, while in the adaptive methods, the angular grid is adjusted + on-the-fly for each radial point according to some accuracy criterion. + """, + ) + + def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: + super().normalize(archive, logger) - multiplicities = n_points * weights + if self.dimensionality not in [1, 2, 3]: + logger.error( + '`dimensionality` meshes different than 1, 2, or 3 are not supported.' + ) + + +class NumericalIntegration(NumericalSettings): + """ + Numerical integration settings used to resolve the following type of integrals by discrete + numerical integration: + + ```math + \int_{\vec{r}_a}^{\vec{r}_b} d^3 \vec{r} F(\vec{r}) \approx \sum_{n=a}^{b} w(\vec{r}_n) F(\vec{r}_n) + ``` + + Here, $F$ can be any type of function which would define the type of rules that can be applied + to solve such integral (e.g., 1D Gaussian quadrature rule or multi-dimensional `angular` rules like the + Lebedev quadrature rule). + + These multidimensional integral has a `Mesh` defined over which the integration is performed, i.e., the + $\vec{r}_n$ points. + """ + + mesh = SubSection(sub_section=Mesh.m_def) + + coordinate = Quantity( + type=MEnum('full', 'radial', 'angular'), + description=""" + Coordinate over which the integration is performed. `full` means the integration is performed in + entire space. `radial` and `angular` describe cases where the integration is performed for + functions which can be splitted into radial and angular distributions (e.g., orbital wavefunctions). """, ) - weights = Quantity( + integration_rule = Quantity( + type=str, # ? extend to MEnum? + description=""" + Integration rule used. This can be any 1D Gaussian quadrature rule or multi-dimensional `angular` rules, + e.g., Lebedev quadrature rule (see e.g., Becke, Chem. Phys. 88, 2547 (1988)). + """, + ) + + integration_thresh = Quantity( type=np.float64, - shape=['n_points'], description=""" - Weight of each point. A value smaller than 1, typically indicates a symmetry operation that was - applied to the mesh. This quantity is equivalent to `multiplicities`: + An accuracy threshold for the integration grid, controlling how fine the + discretization is. Some programs label it "integral accuracy" or "grid accuracy". + For instance, GRIDTHR in Molpro or BFCut in ORCA. + """, + ) - weights = multiplicities / n_points + weight_approximation = Quantity( + type=str, + description=""" + Approximation applied to the weight when doing the numerical integration. + See e.g., C. W. Murray, N. C. Handy + and G. J. Laming, Mol. Phys. 78, 997 (1993). + """, + ) + + weight_cutoff = Quantity( + type=np.float64, + description=""" + Threshold for discarding small weights during integration. + Grid points very close to the nucleus can have very small grid weights. + WEIGHT_CUT in Molpro. + Wcut in ORCA. """, ) def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: super().normalize(archive, logger) + valid_coordinates = ['full', 'radial', 'angular', None] + if self.coordinate not in valid_coordinates: + logger.warning( + f'Invalid coordinate value: {self.coordinate}. Resetting to None.' + ) + self.coordinate = None class KSpaceFunctionalities: @@ -887,3 +950,110 @@ def __init__(self, m_def: 'Section' = None, m_context: 'Context' = None, **kwarg def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None: super().normalize(archive, logger) + + +class OrbitalLocalization(SelfConsistency): + """ + Numerical settings that control orbital localization, typically applied + to transform canonical molecular orbitals into localized orbitals. + These localized orbitals can then be used for: + - Local correlation methods (e.g., LMP2, local CC) + - Interpretable chemical analysis (e.g., identifying bonds, lone pairs) + - Faster post-HF or excited-state calculations + + Inherit from `SelfConsistency` because some localization algorithms are + iterative, requiring thresholds and iteration limits akin to an SCF loop. + + References: + - R. F. Boys, "Construction of Some Molecular Orbitals to Be Approximately Invariant + for Changes in Molecular Conformation," Rev. Mod. Phys. 32, 296 (1960). [Boys] + - J. Pipek, P. G. Mezey, J. Chem. Phys. 90, 4916 (1989). [PM method] + - J. L. Knizia, "Intrinsic Atomic Orbitals: An Unbiased Bridge between Quantum Theory + and Chemical Concepts," J. Chem. Theory Comput. 9, 4834 (2013). [IBO method] + - P. Pulay, Chem. Phys. Lett. 100, 151 (1983). [Local correlation context] + """ + + localization_method = Quantity( + type=MEnum('FB', 'PM', 'IBO', 'IAOIBO', 'IAOBOYS', 'NEWBOYS', 'AHFB'), + description=""" + The chosen localization algorithm: + - FB : Foster-Boys method (a variant of Boys localization) + - PM : Pipek–Mezey localization + - IBO : Intrinsic Bond Orbitals (Knizia) + - IAOIBO : Combination of Intrinsic Atomic Orbitals + Intrinsic Bond Orbitals + - IAOBOYS: IAO-based Boys approach (a custom variant) + - NEWBOYS: Another specialized Boys-based approach + - AHFB : Augmented Hessian Foster-Boys, an alternative with different + optimization steps + """, + ) + + orbital_window = Quantity( + shape=['*'], + type=str, + description=""" + The set of molecular orbitals to be localized (e.g., 'occupied only', 'valence only', + or a range like `[5..20]`). This can be code- or user-defined. For instance, + 'occupied' might indicate all occupied orbitals except possibly core orbitals, + 'valence' might skip deep core and also skip high-lying virtual orbitals, etc. + """, + ) + + core_threshold = Quantity( + type=np.float64, + description=""" + The energy window or threshold defining which orbitals are considered core, + and thus excluded from the localization procedure. For example, an orbital with + an energy below -20.0 eV might be automatically treated as a frozen core orbital and + not localized. + """, + ) + + +class PairApproximationSettings(NumericalSettings): + """ + Numerical settings that control Pair Natural Orbitals (PNOs) in local correlation approaches. + PNOs are a compact representation of the virtual orbital space for each pair (or domain) + of occupied orbitals, improving efficiency in post-HF methods (e.g., local MP2, local CC). + + The nomenclature for these thresholds is adapted from different programs (e.g., Molpro, ORCA). + This class is code-agnostic and allows a single 'approximation_level' to store + code-specific presets such as: + 'tightPNO', 'normalPNO', 'loosePNO' (ORCA) or + 'default', 'tight', 'vtight' (Molpro), etc. + + For more fine-grained custom thresholds, the user can fill the other + domain/pair fields (occupancies, energy thresholds, etc.) on the parser side as needed. + + The nomenclature for these thresholds is adapted from Molpro, ORCA, etc. + See also: + - H.-J. Werner, P. J. Knowles, G. Knizia, F. R. Manby, M. Schütz, + "Molpro: a general-purpose quantum chemistry program package," + WIREs Comput. Mol. Sci. 2, 242 (2012). + - F. Neese, "The ORCA program system," + WIREs Comput. Mol. Sci. 2, 73-78 (2012) for DLPNO expansions. + - G. Knizia, "Intrinsic Bond Orbitals," + J. Chem. Theory Comput. 9, 4834 (2013) for orbital transformations in local correlation. + """ + + # type = Quantity( + # type=MEnum('PAO', 'OSV', 'PNO'), + # description=""" + # PAO : pair atomic orbitals + # OSV : orbital-specific virtuals + # PNO : pair natural orbitals + # """, + # ) + + code_specific_approximation_tier = Quantity( + type=str, + description=""" + A single keyword or label indicating a preset PNO approximation level. Examples: + - In ORCA: 'loosePNO', 'normalPNO', 'tightPNO' + - In Molpro: 'default', 'tight', 'vtight' + - In Psi4: 'loose', 'normal', 'tight' for PNO_CONVERGENCE keyword + - Or any other custom label recognized by your code + + This field is purely descriptive to unify different codes' preset naming. + """, + ) diff --git a/tests/test_basis_set.py b/tests/test_basis_set.py index b1dcb03c..7e6dd427 100644 --- a/tests/test_basis_set.py +++ b/tests/test_basis_set.py @@ -15,6 +15,7 @@ APWOrbital, APWPlaneWaveBasisSet, AtomCenteredBasisSet, + AtomCenteredFunction, BasisSetContainer, MuffinTinRegion, PlaneWaveBasisSet, @@ -418,3 +419,134 @@ def test_quick_step() -> None: ], } # TODO: generate a QuickStep generator in the CP2K plugin + + +@pytest.mark.parametrize( + 'basis_set_name, basis_type, role, expected_name, expected_type, expected_role', + [ + ('cc-pVTZ', 'GTO', 'orbital', 'cc-pVTZ', 'GTO', 'orbital'), + ('def2-TZVP', 'GTO', 'auxiliary_scf', 'def2-TZVP', 'GTO', 'auxiliary_scf'), + ( + 'aug-cc-pVDZ', + 'STO', + 'auxiliary_post_hf', + 'aug-cc-pVDZ', + 'STO', + 'auxiliary_post_hf', + ), + ( + 'custom_basis', + None, + None, + 'custom_basis', + None, + None, + ), # Undefined type and role + ], +) +def test_atom_centered_basis_set_init( + basis_set_name, basis_type, role, expected_name, expected_type, expected_role +): + """Test initialization of AtomCenteredBasisSet.""" + bs = AtomCenteredBasisSet(basis_set=basis_set_name, type=basis_type, role=role) + assert bs.basis_set == expected_name + assert bs.type == expected_type + assert bs.role == expected_role + + +@pytest.mark.parametrize( + 'functions, expected_count', + [ + ( + [ + AtomCenteredFunction( + harmonic_type='spherical', + function_type='s', + n_primitive=3, + exponents=[1.0, 2.0, 3.0], + contraction_coefficients=[0.5, 0.3, 0.2], + ), + ], + 1, + ), + ( + [ + AtomCenteredFunction( + harmonic_type='cartesian', + function_type='p', + n_primitive=1, + exponents=[0.5], + contraction_coefficients=[1.0], + ), + AtomCenteredFunction( + harmonic_type='spherical', + function_type='d', + n_primitive=2, + exponents=[1.0, 2.0], + contraction_coefficients=[0.4, 0.6], + ), + ], + 2, + ), + ], +) +def test_atom_centered_basis_set_functional_composition(functions, expected_count): + """Test functional composition within AtomCenteredBasisSet.""" + bs = AtomCenteredBasisSet(functional_composition=functions) + assert len(bs.functional_composition) == expected_count + for f, ref_f in zip(bs.functional_composition, functions): + assert f.harmonic_type == ref_f.harmonic_type + assert f.function_type == ref_f.function_type + assert f.n_primitive == ref_f.n_primitive + assert np.allclose(f.exponents, ref_f.exponents) + assert np.allclose(f.contraction_coefficients, ref_f.contraction_coefficients) + + +def test_atom_centered_basis_set_normalize(): + """Test normalization of AtomCenteredBasisSet.""" + bs = AtomCenteredBasisSet( + basis_set='cc-pVTZ', + type='GTO', + role='orbital', + functional_composition=[ + AtomCenteredFunction( + harmonic_type='spherical', + function_type='s', + n_primitive=2, + exponents=[1.0, 2.0], + contraction_coefficients=[0.5, 0.5], + ) + ], + ) + bs.normalize(None, None) + # Add assertions for normalized attributes if needed + assert bs.basis_set == 'cc-pVTZ' + assert bs.type == 'GTO' + assert bs.role == 'orbital' + assert len(bs.functional_composition) == 1 + + +def test_atom_centered_basis_set_invalid_data(): + """Test behavior with missing or invalid data.""" + bs = AtomCenteredBasisSet( + basis_set='invalid_basis', + type=None, # Missing type + role=None, # Missing role + ) + assert bs.basis_set == 'invalid_basis' + assert bs.type is None + assert bs.role is None + + # Test functional composition with invalid data + invalid_function = AtomCenteredFunction( + harmonic_type='spherical', + function_type='s', + n_primitive=2, + exponents=[1.0], # Mismatched length + contraction_coefficients=[0.5, 0.5], + ) + bs.functional_composition = [invalid_function] + + # Call normalize to trigger validation + with pytest.raises(ValueError, match='Mismatch in number of exponents'): + invalid_function.normalize(None, None) diff --git a/tests/test_numerical_settings.py b/tests/test_numerical_settings.py index 6426b4cd..0ead803f 100644 --- a/tests/test_numerical_settings.py +++ b/tests/test_numerical_settings.py @@ -9,6 +9,8 @@ KLinePath, KMesh, KSpaceFunctionalities, + Mesh, + NumericalIntegration, ) from . import logger @@ -377,3 +379,82 @@ def test_resolve_points(self, k_line_path: KLinePath): ] ) assert np.allclose(k_line_path.points, points) + + +@pytest.mark.parametrize( + 'dimensionality, expected_warning', + [ + (3, None), # Valid case + (2, None), # Valid case + ( + 5, + '`dimensionality` meshes different than 1, 2, or 3 are not supported.', + ), # Invalid + ( + 0, + '`dimensionality` meshes different than 1, 2, or 3 are not supported.', + ), # Invalid + ], +) +def test_mesh_dimensionality_validation(dimensionality, expected_warning, caplog): + mesh = Mesh(dimensionality=dimensionality) + mesh.normalize(None, logger) + if expected_warning: + assert expected_warning in caplog.text + else: + assert caplog.text == '' + + +@pytest.mark.parametrize( + 'dimensionality, grid, points', + [ + (3, [10, 10, 10], None), # Valid grid, no points defined yet + (3, None, None), # Missing grid and points + ( + 3, + [10, 10, 10], + [[0, 0, 0], [1, 1, 1]], + ), # Valid points (though fewer than grid suggests) + ], +) +def test_mesh_grid_and_points(dimensionality, grid, points): + mesh = Mesh(dimensionality=dimensionality, grid=grid, points=points) + assert mesh.dimensionality == dimensionality + if grid is not None: + assert np.allclose(mesh.grid, grid) + else: + assert mesh.grid == grid + if points is not None: + assert np.allclose(mesh.points, points) + else: + assert mesh.points == points + + +def test_mesh_spacing_normalization(): + mesh = Mesh(dimensionality=3, grid=[10, 10, 10], spacing=[0.1, 0.1, 0.1]) + mesh.normalize(None, logger) + assert np.allclose(mesh.spacing, [0.1, 0.1, 0.1]) + + +def test_numerical_integration_mesh(): + mesh = Mesh(dimensionality=3, grid=[10, 10, 10]) + integration = NumericalIntegration(mesh=mesh) + assert integration.mesh.dimensionality == 3 + assert np.allclose(integration.mesh.grid, [10, 10, 10]) + + +@pytest.mark.parametrize( + 'integration_thresh, weight_cutoff', + [ + (1e-6, 1e-3), # Valid thresholds + (None, 1e-3), # Missing integration threshold + (1e-6, None), # Missing weight cutoff + (None, None), # Both thresholds missing + ], +) +def test_numerical_integration_thresholds(integration_thresh, weight_cutoff): + integration = NumericalIntegration( + integration_thresh=integration_thresh, weight_cutoff=weight_cutoff + ) + assert integration.integration_thresh == integration_thresh + assert integration.weight_cutoff == weight_cutoff