Skip to content

Commit

Permalink
improved class descriptions, new MolecularOrbitals class, improved Ha…
Browse files Browse the repository at this point in the history
…rtreeFock section
  • Loading branch information
EBB2675 committed Jan 28, 2025
1 parent 6f3e30b commit 92fd95e
Show file tree
Hide file tree
Showing 4 changed files with 359 additions and 195 deletions.
103 changes: 69 additions & 34 deletions src/nomad_simulations/schema_packages/atoms_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -643,70 +643,105 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None:
self.atomic_number = self.resolve_atomic_number(logger=logger)


class MolecularOrbitalsState(Entity):
class MolecularOrbitals(Entity):
"""
A base section to define molecular orbitals.
This class stores all molecular orbitals (MO) in a single container, with each Quantity using
arrays indexed by mo_num and ao_num.
Comparison to TREXIO:
- mo/type -> mo_type
- mo/num -> mo_num
- mo/coefficient -> coefficient
- mo/coefficient_im -> coefficient_im
- mo/symmetry -> symmetry
- mo/occupation -> occupation
- mo/energy -> energy
- mo/spin -> spin
"""

symmetry_label = Quantity(
mo_type = Quantity(
type=str,
shape=['mo_num'],
description="""
Symmetry label of the molecular orbital (e.g., 'sigma', 'pi', 'delta').
Type of the molecular orbitals
e.g. 'canonical', 'localized'.
In case of CASSCF calculations, there will be orbital subspaces of different nature.
E.g. :
Internal orbitals : canonical
Active orbitals : natural
Virtual orbitals : canonical
""",
)

energy = Quantity(
mo_num = Quantity(
type=np.int32,
description="""
Number of molecular orbitals.
""",
)

ao_num = Quantity(
type=np.int32,
description="""
Number of atomic orbitals or basis functions (often needed for coefficient shape).
Corresponds to the 'ao.num' dimension in TREXIO.
""",
)

coefficient = Quantity(
type=np.float64,
#unit='eV',
shape=['mo_num', 'ao_num'],
description="""
Energy of the molecular orbital.
Real part of the MO coefficients. The shape is
[mo.num, ao.num], meaning each row corresponds to one MO, and each column
to one atomic orbital (or basis function).
""",
)

occupation = Quantity(
coefficient_im = Quantity(
type=np.float64,
shape=['mo_num', 'ao_num'],
description="""
Occupation of the molecular orbital. This value is typically an integer (0 or 2)
in closed-shell systems, but can be fractional in open-shell or spin-polarized
calculations.
Imaginary part of the MO coefficients. The shape is
[mo.num, ao.num]. This array may be omitted or set to zero if the orbitals
are purely real.
""",
)

spin = Quantity(
type=MEnum('alpha', 'beta'),
symmetry = Quantity(
type=str,
shape=['mo_num'],
description="""
Spin of the molecular orbital. 'alpha' corresponds to spin-up, 'beta' corresponds
to spin-down.
Symmetry label for each MO, e.g. group-theory labels or
simpler 'sigma', 'pi', 'delta'.
""",
)

coefficients = Quantity(
occupation = Quantity(
type=np.float64,
shape=['number_of_atoms', 'number_of_basis_functions'],
shape=['mo_num'],
description="""
Coefficients of the molecular orbital expressed as a linear combination of atomic orbitals.
The shape corresponds to the number of atoms and their associated basis functions.
Occupation numbers for each MO. Typically in [0, 2]
for closed-shell systems, but might be fractional in open-shell systems or multi-reference calculations.
""",
)

atom_contributions = SubSection(
sub_section=AtomsState.m_def,
repeats=True,
energy = Quantity(
type=np.float64,
shape=['mo_num'],
description="""
Contribution of each atom to the molecular orbital, as defined by its basis functions.
Orbital energies for each MO.
""",
)

def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None:
super().normalize(archive, logger)

# Validation: Ensure occupation values are consistent
if self.occupation is not None and (self.occupation < 0 or self.occupation > 2):
logger.error("The molecular orbital occupation must be between 0 and 2.")
spin = Quantity(
type=np.int32,
shape=['mo_num'],
description="""
Spin channel for each MO if this is an unrestricted open-shell set.
Typically 0 for alpha, 1 for beta.
""",
)

# Validation: Ensure coefficients are provided if atom contributions are defined
if self.atom_contributions and self.coefficients is None:
logger.error(
"Coefficients must be defined when atom contributions are provided."
)

116 changes: 100 additions & 16 deletions src/nomad_simulations/schema_packages/basis_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,24 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None:

class AtomCenteredFunction(ArchiveSection):
"""
Specifies a single function (term) in an atom-centered basis set.
Specifies a single contracted basis function in an atom-centered basis set.
In many quantum-chemistry codes, an atom-centered basis set is composed of
several "shells," each shell containing one or more basis functions of a certain
angular momentum. For instance, a shell of p-type orbitals (L=1) typically
consists of 3 degenerate functions (p_x, p_y, p_z) if `harmonic_type='cartesian'`
or 3 spherical harmonics if `harmonic_type='spherical'`.
A single "atom-centered function" can be a linear combination of multiple
primitive Gaussians (or Slater-type orbitals, STOs).
In practice, these contract together to form the final basis function used by
the SCF or post-SCF method. Often, each contraction is labeled by its
angular momentum (e.g., s, p, d, f) and a set of exponents and coefficients.
**References**:
- T. Helgaker, P. Jørgensen, J. Olsen, *Molecular Electronic-Structure Theory*, Wiley (2000).
- F. Jensen, *Introduction to Computational Chemistry*, 2nd ed., Wiley (2007).
- J. B. Foresman, Æ. Frisch, *Exploring Chemistry with Electronic Structure Methods*, Gaussian Inc.
"""

harmonic_type = Quantity(
Expand All @@ -197,7 +214,13 @@ class AtomCenteredFunction(ArchiveSection):
),
default='spherical',
description="""
Specifies whether the basis functions are spherical-harmonic or cartesian functions.
Specifies whether the basis functions are expanded in **spherical** (pure)
harmonics or **cartesian** harmonics. Many modern quantum-chemistry codes
default to *spherical harmonics* for d, f, g..., which eliminates the
redundant functions found in the cartesian sets.
- `'spherical'` : (2l+1) functions for a shell of angular momentum l
- `'cartesian'` : (l+1)(l+2)/2 functions for that shell (extra functions appear)
""",
)

Expand All @@ -218,39 +241,61 @@ class AtomCenteredFunction(ArchiveSection):
'spdf',
),
description="""
L=a+b+c
The angular momentum of GTO to be added.
Symbolic label for the **angular momentum** of this contracted function.
Typical single-letter labels:
- 's' => L=0
- 'p' => L=1
- 'd' => L=2
- 'f' => L=3
- 'g' => L=4
- 'h', 'i', etc. => still higher angular momenta
Combined labels like 'sp' or 'spdf' indicate a **combined shell** in which
multiple angular momenta share exponents. For example, in some older Pople
basis sets, an 'sp' shell has an s- and p-type function sharing the same
exponents but different contraction coefficients.
""",
)

n_primitive = Quantity(
type=np.int32,
description="""
Number of primitives.
Linear combinations of the primitive Gaussians are formed to approximate the radial extent of an STO.
Number of **primitive** functions in this contracted basis function.
For example, in a contracted Gaussian-type orbital (GTO) approach, each basis
function might be built from a sum of `n_primitive` Gaussians with different
exponents, each scaled by a contraction coefficient.
""",
)

exponents = Quantity(
type=np.float32,
shape=['n_primitive'],
description="""
List of exponents for the basis function.
The **exponents** of each primitive basis function.
In a Gaussian basis set (GTO), these are the alpha_i in
exp(-alpha_i * r^2). In a Slater-type basis (STO), they'd be
exp(-zeta_i * r). Typically sorted from largest to smallest.
""",
)

contraction_coefficients = Quantity(
type=np.float32,
shape=['*'], # Flexible shape to handle combined types (e.g. SP, SPD..)
description="""
List of contraction coefficients corresponding to the exponents.
The **contraction coefficients** associated with each primitive exponent.
In the simplest case (pure s- or p-function), this array has length
equal to `n_primitive`. For combined shells (like 'sp'), the length
might be `2 * n_primitive`, because you have separate coefficients
for the s-part and the p-part.
""",
)

point_charge = Quantity(
type=np.float32,
description="""
the value of the point charge.
If using a basis function that explicitly includes a point-charge or an
ECP-like pseudo-component, this field can store that net charge.
Typically 0 for standard GTO or STO expansions.
Some extended basis concepts (or embedded charges) might set it differently.
""",
)

Expand All @@ -271,7 +316,7 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None:
f'found {len(self.exponents)}.'
)

# Resolve combined types
# For combined shells (like 'sp', 'spd', etc.), ensure the coefficient array is large enough
if self.function_type and len(self.function_type) > 1:
num_types = len(self.function_type) # For SP: 2, SPD: 3, etc.
if self.contraction_coefficients is not None:
Expand Down Expand Up @@ -307,13 +352,36 @@ def normalize(self, archive: 'EntryArchive', logger: 'BoundLogger') -> None:

class AtomCenteredBasisSet(BasisSetComponent):
"""
Defines an atom-centered basis set.
Defines an **atom-centered basis set** for quantum chemistry calculations.
Unlike plane-wave methods, these expansions are typically built around each atom's
position, using either:
- Slater-type orbitals (STO)
- Gaussian-type orbitals (GTO)
- Numerical atomic orbitals (NAO)
- Effective-core potentials or point-charges (PC, cECP, etc.)
This section references multiple `AtomCenteredFunction` objects, each describing
a single contracted function or shell. Additionally, one can label the overall
basis set name (e.g., "cc-pVTZ", "def2-SVP", "6-31G**") and specify the high-level
role of the basis set in the calculation.
**Common examples**:
- **Pople basis** (3-21G, 6-31G(d), 6-311++G(2df,2pd), etc.)
- **Dunning correlation-consistent** (cc-pVDZ, cc-pVTZ, aug-cc-pVTZ, etc.)
- **Slater basis** used in ADF, for instance
- **ECP** (Effective Core Potential) expansions like LANL2DZ or SDD for transition metals
**References**:
- F. Jensen, *Introduction to Computational Chemistry*, 2nd ed., Wiley (2007).
- A. Szabo, N. S. Ostlund, *Modern Quantum Chemistry*, McGraw-Hill (1989).
- T. H. Dunning Jr., J. Chem. Phys. 90, 1007 (1989) for correlation-consistent basis sets.
"""

basis_set = Quantity(
type=str,
description="""
name of the basis set.
**Name** or label of the basis set as recognized by the code or standard
library. Examples: "6-31G*", "cc-pVTZ", "def2-SVP", "STO-3G", "LANL2DZ" (ECP).
""",
)

Expand All @@ -326,7 +394,15 @@ class AtomCenteredBasisSet(BasisSetComponent):
'PC', # Point charges
),
description="""
Type of the basis set, e.g. STO or GTO.
The **functional form** of the basis set:
- 'STO': Slater-type orbitals
- 'GTO': Gaussian-type orbitals
- 'NAO': Numerical atomic orbitals
- 'cECP': Some variant of a "capped" or shape-consistent ECP
- 'PC': Point charges (or ghost basis centers)
If a code uses a mixture (e.g., GTO + ECP), it might either store them
as separate `AtomCenteredBasisSet` sections or unify them if the code does so internally.
""",
)

Expand All @@ -335,16 +411,24 @@ class AtomCenteredBasisSet(BasisSetComponent):
'orbital',
'auxiliary_scf',
'auxiliary_post_hf',
'cabs', # complementary auxiliary basis set
'cabs',
),
description="""
The role of the basis set.
The role of this basis set in the calculation:
- 'orbital': main orbital basis for the SCF
- 'auxiliary_scf': used for RI-J or density fitting in SCF
- 'auxiliary_post_hf': used in MP2, CC, etc.
- 'cabs': complementary auxiliary basis for explicitly correlated (F12) methods.
""",
)

total_number_of_basis_functions = Quantity(
type=np.int32,
description=""""The total number of basis functions.""",
description="""
The **total** number of contracted basis functions in this entire set.
This is typically the sum of all `(2l+1)` or cartesian expansions across
all shells on all relevant atoms (within the scope of this section).
""",
)

functional_composition = SubSection(
Expand Down
Loading

1 comment on commit 92fd95e

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Coverage

Coverage Report
FileStmtsMissCoverMissing
src/nomad_simulations
   __init__.py4250%3–4
   _version.py11282%5–6
src/nomad_simulations/schema_packages
   __init__.py15287%39–41
   atoms_state.py2002190%13–15, 201–204, 228, 283–284, 352–353, 355, 537, 549–550, 611–615, 630–634, 641
   basis_set.py2654085%8–9, 122–133, 172–185, 320–347, 616–620, 642–643, 687–690, 809, 840, 842
   general.py89891%4–7, 121, 185, 295–296, 306
   model_method.py3068971%10–12, 175–178, 181–188, 280–281, 301, 322–343, 359–385, 388–405, 591, 784, 795, 837–844, 882, 901, 981, 1038, 1113, 1227, 1311–1328
   model_system.py3503789%45–51, 235, 254, 258, 261, 264, 290, 376–377, 454–455, 472–473, 686–689, 736–743, 917–918, 1155–1159, 1165–1166, 1174–1175, 1180, 1203
   numerical_settings.py2946777%12–14, 204–210, 280, 282–283, 286–289, 293–294, 301–304, 313–316, 320–323, 325–328, 333–336, 342–345, 532–559, 634, 669–672, 696, 699, 744, 746–749, 753, 757, 804, 808–829, 884–885, 952, 1122
   outputs.py1201092%9–10, 252–255, 295–298, 323, 325, 362, 381
   physical_property.py102793%20–22, 202, 331–333
   variables.py861286%8–10, 98, 121, 145, 167, 189, 211, 233, 256, 276
src/nomad_simulations/schema_packages/properties
   band_gap.py51590%8–10, 135–136
   band_structure.py1232580%9–11, 232–265, 278, 285, 321–322, 325, 372–373, 378
   energies.py42979%7–9, 36, 57, 82, 103, 119, 134
   fermi_surface.py17476%7–9, 40
   forces.py22673%7–9, 36, 56, 79
   greens_function.py991387%7–9, 210–211, 214, 235–236, 239, 260–261, 264, 400
   hopping_matrix.py29583%7–9, 58, 94
   permittivity.py48883%7–9, 97–105
   spectral_profile.py26012851%9–11, 57–60, 95–98, 199–300, 356–368, 393–396, 416, 421–424, 466–502, 526, 573–576, 592–593, 598–604
   thermodynamics.py752764%7–9, 35, 56, 72, 81, 90, 101, 110, 137, 147, 157, 172–174, 177, 193, 213–215, 218, 234, 254–256, 259
src/nomad_simulations/schema_packages/utils
   utils.py791680%8–11, 65–74, 83–84, 89, 92, 169–170
TOTAL269854380% 

Tests Skipped Failures Errors Time
423 0 💤 0 ❌ 0 🔥 6.677s ⏱️

Please sign in to comment.