From fed5efb8f034c9c36b8414d6f43093a8d78dfe9b Mon Sep 17 00:00:00 2001
From: Robert <robertmstanton@proton.me>
Date: Tue, 13 May 2025 08:52:00 -0600
Subject: [PATCH 1/5] author list

---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index ab8bf1e..31a50e8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -11,6 +11,7 @@ authors = [
     {name = "Maksim Kulichenko", email = "maxim@lanl.gov"},
     {name = "Mehmet Cagri Kaymak", email = "mkaymak@lanl.gov"},
     {name = "Alejandro Lopez-Bezanilla", email = "alopezb@cadejo.lanl.gov"},
+    {name = "Robert Stanton", email = "stanton@lanl.gov"},
 ]
 dependencies = [
     "scipy>=1",

From 8221ecc3b8686aa69f6df74d1a32bb5b77a94b74 Mon Sep 17 00:00:00 2001
From: Robert <robertmstanton@proton.me>
Date: Tue, 13 May 2025 09:35:03 -0600
Subject: [PATCH 2/5] single point calculation docs/formatting/cleanup

---
 src/sedacs/driver/graph_adaptive.py | 349 ++++++++++++++++++++--------
 1 file changed, 255 insertions(+), 94 deletions(-)

diff --git a/src/sedacs/driver/graph_adaptive.py b/src/sedacs/driver/graph_adaptive.py
index 5d5b81a..4abf680 100644
--- a/src/sedacs/driver/graph_adaptive.py
+++ b/src/sedacs/driver/graph_adaptive.py
@@ -36,6 +36,8 @@
 from seqm.seqm_functions.pack import pack
 import gc
 import numpy as np
+from sedacs.engine import Engine
+
 try:
     from mpi4py import MPI
     is_mpi_available = True
@@ -43,127 +45,283 @@
     is_mpi_available = False
 __all__ = ["get_occ_singlePoint", "get_adaptiveSCFDM"]
 
-# @brief Construct a connectivity graph based on constructing density matrices of parts of the system.
-def get_singlePoint(sdc, eng,  partsPerGPU, partsPerNode, node_id, node_rank, rank, gpu_comm, parts, partsCoreHalo, sy, hindex, mu0,
-                    molecule_whole, P_contr, graph_for_pairs, graph_maskd):
-    '''
-    Function calculates CH hamiltonians (ham), then eVals and dVals on each rank of gpu_comm.
-    Then it gathers everything on global rank 0, computes chemical potential mu0.
+def get_singlePoint(sdc,
+                    eng,
+                    partsPerGPU,
+                    partsPerNode,
+                    node_id,
+                    node_rank,
+                    rank,
+                    gpu_comm,
+                    parts,
+                    partsCoreHalo,
+                    sy,
+                    hindex,
+                    mu0,
+                    molecule_whole,
+                    P_contr,
+                    graph_for_pairs,
+                    graph_maskd):
+    """
+    Function calculates CH hamiltonians (ham), then eVals and dVals on each
+    rank of gpu_comm. Then it gathers everything on global rank 0, computes
+    chemical potential mu0.
+
+    Note that in this context:
+    eVals -> Eigenvalues
+    Q     -> Eigenvectors
+    dVals -> Norm over the *CORE PART* of the Eigenvectors, Q.
+
+    Parameters
+    ----------
+
     sdc:
-    eng:
-    partsPerGPU: number of CH processed by one rank.
-    partsPerNode: number of CH processed on one node.
+        The SEDACS driver.
+    eng: Engine
+        The SEDACS engine.
+
+    partsPerGPU:
+        number of CH processed by one rank.
+    partsPerNode:
+        number of CH processed on one node.
     node_id:
-    node_rank: local rank on a node. E.g., for [0,1,2,3] [4,5,6,7], global rank 4 is local rank 0.
-    rank: global rank
-    gpu_comm: global communicator for ranks with GPU.
-    If on CPU, all ranks are involved. gpu_comm is identical to master comm. 
-    If on GPU and num_gpus (per node) == node_numranks, gpu_comm is identical to master comm.
-    If on GPU and num_gpus (per node) <= node_numranks, gpu_comm is different form master comm. For example, 8 ranks on two nodes [0,1,2,3] and [4,5,6,7] with 2 GPUs per node. In that case, only ranks [0,1] and [4,5] are involved. They, however, become [0,1] [2,3] within gpu_comm.
-    parts: list of cores
-    partsCoreHalo: list of core+halo
+        Current node identifier.
+    node_rank:
+        Local rank on a node. E.g., for [0,1,2,3] [4,5,6,7],
+        global rank 4 is local rank 0.
+    rank:
+        Global rank.
+    gpu_comm:
+        Global communicator for ranks with GPU.
+        If on CPU, all ranks are involved. gpu_comm is identical to master comm
+        If on GPU and num_gpus (per node) == node_numranks, gpu_comm is
+        identical to master comm.
+        If on GPU and num_gpus (per node) <= node_numranks, gpu_comm is
+        different form master comm. For example, 8 ranks on two nodes [0,1,2,3]
+        and [4,5,6,7] with 2 GPUs per node. In that case, only ranks [0,1] and
+        [4,5] are involved. They, however, become [0,1] [2,3] within gpu_comm.
+    parts:
+        List of core indices.
+    partsCoreHalo:
+        List of core+halo indices.
     sy:
+        System object.
     hindex:
-    mu0: chemical potential
-    molecule_whole: pyseqm molecule object
-    P_contr: contracted dm. (sy.nats, sdc.maxDeg, 4,4)
-    graph_for_pairs: graph of communities. E.g. graph_for_pairs[i] is a whole CH community in which atom i is a core atom, including itself. graph_for_pairs[i][0] is a community size
-    graph_maskd: diagonal mask for P_contr
-    '''
-    ### Instead of this:
-    # partIndex1 = rank * partsPerRank
-    # partIndex2 = (rank + 1) * partsPerRank
-    ### we do this because there might be fewer GPUs per node than ranks per node.
+        Atom->orbtial index mapping.
+    mu0:
+        The chemical potential.
+    molecule_whole:
+        PYSEQM Molecule object.
+    P_contr:
+        Contracted density matrix. (sy.nats, sdc.maxDeg, 4, 4)
+    graph_for_pairs:
+        Graph of communities. E.g. graph_for_pairs[i] is a whole CH community
+        in which atom i is a core atom, including itself. graph_for_pairs[i][0]
+        is a community size.
+    graph_maskd:
+        Diagonal mask for the contracted density matrix, P_contr.
+    """
+    # Handles the scenario where N_GPU < N_ranks_per_node
     partIndex1 = (node_rank) * partsPerGPU + node_id*partsPerNode
     partIndex2 = (node_rank + 1) * partsPerGPU + node_id*partsPerNode
 
-    if sdc.UHF: # open shell
+    # All arrays in thsi code block are flattened 1D numpy arrays.
+    if sdc.UHF:  # Open shell calculation.
         dValOnRank = np.empty((2, 0))
-        eValOnRank = np.empty((2, 0)) # this will store flattened eVals for all CH. 1d np array.
-    else: # closed shell
-        dValOnRank = np.array([]) # this will store flattened dVals for all CH. 1d np array. For mu0
-        eValOnRank = np.array([]) # this will store flattened eVals for all CH. 1d np array.
-    eValOnRank_list = [] # this will store eVals arranged per CH. List of 1D torch tensors. For mu0 and later for dm
-    dValOnRank_list = [] # this will store dVals arranged per CH. List of 1D torch tensors. For mu0 and later for dm
-    Q_list = [] # Eigenvectors for each part
-    Nocc_list = [] # Number of occupied orbitals for each part. It's not used in thermal HF but lets keep this option.
-    core_indices_in_sub_expanded_list = [] # Indices of core hamiltonian in core+halo hamiltonian. Might be useful when core and halo atoms are shuffled to stay sorted, like in PySEQM.
-    NH_Nh_Hs_list = [] # list of [number_of_heavy_atoms, number_of_hydrogens, dim_of_coreHalo_ham, nocc]
-    EELEC = 0.0 # this will sum electronic energy of CHs on the current rank
+        eValOnRank = np.empty((2, 0))
+    else:  # Closed shell calculation.
+        dValOnRank = np.array([])
+        eValOnRank = np.array([])
+
+    # eVals and dVals arranged for each Core-Halo. These are given as a List of
+    # torch Tensors. These are needed for mu0, and later for the density matrix
+    eValOnRank_list = []
+    dValOnRank_list = []
+
+    # Eigenvectors for each partition
+    Q_list = []
+
+    # Number of occupied orbitals for each partition.
+    # NOTE: This is not used for thermal HF.
+    Nocc_list = []
+
+    # Indices of core hamiltonian in core+halo hamiltonian. Might be useful
+    # when core and halo atoms are shuffled to stay sorted, like in PySEQM.
+    core_indices_in_sub_expanded_list = []
+
+    # List -> [number_heavy_atoms, number_hydrogens, dim_H_C+H, N_occupied.
+    NH_Nh_Hs_list = []
+
+    # Result for the summed eletronic energies of C+Hs on the current rank.
+    EELEC = 0.0
+
+    # Loop over the partitions allocated to the current rank.
     for partIndex in range(partIndex1, partIndex2):
+
         ticHam = time.perf_counter()
-        #print("\n Rank, part", rank, partIndex)
+
+        # Extract the subsystem information. (CORE + HALO).
         subSy = System(len(partsCoreHalo[partIndex]))
         subSy.symbols = sy.symbols
-        subSy.coords, subSy.types = extract_subsystem(sy.coords, sy.types, sy.symbols, partsCoreHalo[partIndex])
+        subSy.coords, subSy.types = extract_subsystem(sy.coords,
+                                                      sy.types,
+                                                      sy.symbols,
+                                                      partsCoreHalo[partIndex])
 
+        # Extract the subsystem information. (CORE ONLY).
         subSyCore = System(len(parts[partIndex]))
         subSyCore.symbols = sy.symbols
-        subSyCore.coords,subSyCore.types = extract_subsystem(sy.coords,sy.types,sy.symbols,parts[partIndex])
+        subSyCore.coords, subSyCore.types = extract_subsystem(sy.coords,
+                                                              sy.types,
+                                                              sy.symbols,
+                                                              parts[partIndex])
 
         if sdc.writeGeom:
-            partFileName = "subSy" + str(rank) + "_" + str(partIndex) + ".pdb"
-            write_pdb_coordinates(partFileName, subSy.coords, subSy.types, subSy.symbols)
-            write_xyz_coordinates("subSy" + str(rank) + "_" + str(partIndex) + ".xyz", subSy.coords, subSy.types, subSy.symbols)
-            partCoreFileName = "CoreSubSy"+str(rank)+"_"+str(partIndex)+".pdb"
-            write_pdb_coordinates(partCoreFileName,subSyCore.coords,subSyCore.types,subSyCore.symbols)
-            write_xyz_coordinates("CoreSubSy"+str(rank)+"_"+str(partIndex)+".xyz",subSyCore.coords,subSyCore.types,subSyCore.symbols)
 
-        
+            # Write subsystems to PDB and XYZ (Core+Halo)
+            partFileName = "subSy"+str(rank)+"_"+str(partIndex)+".pdb"
+            write_pdb_coordinates(partFileName,
+                                  subSy.coords,
+                                  subSy.types,
+                                  subSy.symbols)
+            write_xyz_coordinates("subSy"+str(rank)+"_"+str(partIndex)+".xyz",
+                                  subSy.coords,
+                                  subSy.types,
+                                  subSy.symbols)
+
+            # Write cores to PDB and XYZ
+            partCoreFileName = "CoreSubSy"+str(rank)+"_"+str(partIndex)+".pdb"
+            write_pdb_coordinates(partCoreFileName,
+                                  subSyCore.coords,
+                                  subSyCore.types,
+                                  subSyCore.symbols)
+            write_xyz_coordinates("CoreSubSy"+str(rank)+"_"+str(partIndex)+".xyz",
+                                  subSyCore.coords,
+                                  subSyCore.types,
+                                  subSyCore.symbols)
+
+        # Get the core indices, expandide core indices, and orbital indicies
+        # for the current subsystem.
         core_indices_in_sub, core_indices_in_sub_expanded, hindex_sub = \
-            get_coreHalo_ham_inds(parts[partIndex], partsCoreHalo[partIndex], sdc, sy, subSy, device=P_contr.device)
-        
-        # Get CH hamiltonian. If on GPU, temporarily redefine molecule_whole on GPU (its faster than transfering) and send P_contr to GPU.
-        # The idea is to have P_contr in shared memory on each node, then update in parallel on node 0.
-        # If initialized on GPU, then we'll need to transfer it back into shared memory CPU to update in parallel. Ideally, this needs to be fixed.
+            get_coreHalo_ham_inds(parts[partIndex],
+                                  partsCoreHalo[partIndex],
+                                  sdc,
+                                  sy,
+                                  subSy,
+                                  device=P_contr.device)
+
+        # Get CH hamiltonian. If on GPU, temporarily redefine molecule_whole 
+        # on GPU (its faster than transfering) and send P_contr to GPU. The
+        # idea is to have P_contr in shared memory on each node, then update in
+        # parallel on node 0. If initialized on GPU, then well need to transfer
+        # it back into shared memory CPU to update in parallel. Ideally,
+        # this needs to be fixed.
         ham_timing = {}
         if sdc.scfDevice == 'cuda':
             device = 'cuda:{}'.format(node_rank)
-            tmp_molecule_whole = get_molecule_pyseqm(sdc, sy.coords, sy.symbols, sy.types, do_large_tensors = sdc.use_pyseqm_lt, device=device)[0]
-            ham, eElec = get_hamiltonian(sdc, eng, subSy.coords, subSy.types, subSy.symbols, 
-                                  parts[partIndex], partsCoreHalo[partIndex], tmp_molecule_whole, P_contr.to(device), graph_for_pairs, graph_maskd, core_indices_in_sub_expanded, ham_timing, verbose=False)
+
+            # Get the temporary PYSEQM Molecule object for this subsystem.
+            # Simply redefine on the proper device rather than transferring
+            # molecule_whole from CPU.
+            tmp_molecule_whole = get_molecule_pyseqm(sdc,
+                                                     sy.coords,
+                                                     sy.symbols,
+                                                     sy.types,
+                                                     do_large_tensors=sdc.use_pyseqm_lt,
+                                                     device=device)[0]
+
+            # Get the Hamiltonian using the PYSEQM Molecule object for this 
+            # Core+Halo subsystem.
+            ham, eElec = get_hamiltonian(sdc,
+                                         eng,
+                                         subSy.coords,
+                                         subSy.types,
+                                         subSy.symbols,
+                                         parts[partIndex],
+                                         partsCoreHalo[partIndex],
+                                         tmp_molecule_whole,
+                                         P_contr.to(device),
+                                         graph_for_pairs,
+                                         graph_maskd,
+                                         core_indices_in_sub_expanded,
+                                         ham_timing,
+                                         verbose=False)
             del tmp_molecule_whole
+
         else:
             device = 'cpu'
-            ham, eElec = get_hamiltonian(sdc, eng,subSy.coords,subSy.types,subSy.symbols, 
-                                  parts[partIndex], partsCoreHalo[partIndex], molecule_whole, P_contr, graph_for_pairs, graph_maskd, core_indices_in_sub_expanded, ham_timing, verbose=False)
-        EELEC += eElec
 
+            # Get the Hamiltonian using the PYSEQM Molecule object for this
+            # Core+Halo subsystem.
+            ham, eElec = get_hamiltonian(sdc,
+                                         eng,
+                                         subSy.coords,
+                                         subSy.types,
+                                         subSy.symbols,
+                                         parts[partIndex],
+                                         partsCoreHalo[partIndex],
+                                         molecule_whole,
+                                         P_contr,
+                                         graph_for_pairs,
+                                         graph_maskd,
+                                         core_indices_in_sub_expanded,
+                                         ham_timing,
+                                         verbose=False)
 
-        #print("TOT {:>8.3f} (s)".format(time.perf_counter() - tic))
+        EELEC += eElec
 
         # Get eVals, dVals
         tic = time.perf_counter()
         norbs = subSy.nats
         occ = int(float(norbs) / 2.0)  # Get the total occupied orbitals. Not used.
         coreSize = len(parts[partIndex])
-        eVals, dVals, Q, NH_Nh_Hs = get_eVals(eng, sdc, sy, ham, subSy.coords, subSy.symbols, subSy.types, sdc.Tel, mu0,
-                                                core_indices_in_sub, core_indices_in_sub_expanded, hindex_sub,
-                                                coreSize, subSy, subSyCore, parts[partIndex], partsCoreHalo[partIndex], verbose=False)
+        eVals, dVals, Q, NH_Nh_Hs = get_eVals(eng,
+                                              sdc,
+                                              sy,
+                                              ham,
+                                              subSy.coords,
+                                              subSy.symbols,
+                                              subSy.types,
+                                              sdc.Tel,
+                                              mu0,
+                                              core_indices_in_sub,
+                                              core_indices_in_sub_expanded,
+                                              hindex_sub,
+                                              coreSize,
+                                              subSy,
+                                              subSyCore,
+                                              parts[partIndex],
+                                              partsCoreHalo[partIndex],
+                                              verbose=False)
         del ham
-        
+
+        # Append eVals/eVals depending on if open/closed shell.
         if sdc.UHF:
             dValOnRank = np.append(dValOnRank, dVals, axis=1)
             eValOnRank = np.append(eValOnRank, eVals.cpu().numpy(), axis=1)
         else:
             dValOnRank = np.append(dValOnRank, dVals)
             eValOnRank = np.append(eValOnRank, eVals.cpu().numpy())
+
+        # Append eVals/dVals/eigenvectors local to this rank.
         eValOnRank_list.append(eVals.cpu())
         dValOnRank_list.append(dVals)
+
+        # See above documentation for the explanation of all of these lists.
+        # These effective collect the current rank's ifnromation for the above-
+        # mentioned quantities.
         Q_list.append(Q.cpu())
         core_indices_in_sub_expanded_list.append(core_indices_in_sub_expanded)
         NH_Nh_Hs_list.append(NH_Nh_Hs)
         Nocc_list.append(occ)
-        #print("| t eVals/dVals {:>9.4f} (s)".format(time.perf_counter() - tic))
+
+        # Timing on getting the Hamiltonians.
         ham_timing['eVals/dVals'] = time.perf_counter() - tic
         ham_timing['TOT'] = time.perf_counter() - ticHam
-        #ham_timing_formatted = {key: f"{value:8.3f}" for key, value in ham_timing.items()}
         formatted_string = " | ".join(f"{key} {value:8.3f}" for key, value in ham_timing.items())
         print('Rank', rank, 'part', partIndex, ':', formatted_string)
 
-
-    ##########
-    ##########
+    # Time the MPI collection of eigenvectors.
     tic = time.perf_counter()
     if rank != 0:
         gpu_comm.send(Q_list, dest=0, tag=0)
@@ -171,12 +329,8 @@ def get_singlePoint(sdc, eng,  partsPerGPU, partsPerNode, node_id, node_rank, ra
         Q_LIST = [Q_list]
         for i in range(1, gpu_comm.Get_size()):
             Q_LIST.append(gpu_comm.recv(source=i, tag=0))
-    print("Time Q_LIST send/recv {:>9.4f} (s)".format(time.perf_counter() - tic), rank)
-
-    # tic = time.perf_counter()
-    # torch.save(Q_list, 'Q/Q_list_{}.pt'.format(rank)) #### oldR
-    # print("Time to save Q_list {:>9.4f} (s)".format(time.perf_counter() - tic), rank)
-
+    print("Time Q_LIST send/recv {:>9.4f} (s)"
+          .format(time.perf_counter() - tic), rank)
 
     tic = time.perf_counter()
     full_dVals = None
@@ -187,15 +341,24 @@ def get_singlePoint(sdc, eng,  partsPerGPU, partsPerNode, node_id, node_rank, ra
     if rank == 0:
         eValOnRank_SIZES = np.empty(gpu_comm.Get_size(), dtype=int)
 
+    # Gather eigenvalues.
     gpu_comm.Gather(eValOnRank_size, eValOnRank_SIZES, root=0)
+
+    # Set up full containers on Rank 0.
     if rank == 0:
+
         if sdc.UHF:
-            full_dVals = np.empty((2, np.sum(eValOnRank_SIZES)), dtype=eValOnRank.dtype)
+            full_dVals = np.empty((2, np.sum(eValOnRank_SIZES)),
+                                  dtype=eValOnRank.dtype)
             recvcounts = [2 * size for size in eValOnRank_SIZES]
+
         else:
-            full_dVals = np.empty(np.sum(eValOnRank_SIZES), dtype=eValOnRank.dtype)
-            full_eVals = np.empty(np.sum(eValOnRank_SIZES), dtype=eValOnRank.dtype) #### oldR
+            full_dVals = np.empty(np.sum(eValOnRank_SIZES),
+                                  dtype=eValOnRank.dtype)
+            full_eVals = np.empty(np.sum(eValOnRank_SIZES),
+                                  dtype=eValOnRank.dtype)
 
+    # Gather the actual values.
     if sdc.UHF:
         dValOnRank_flat = dValOnRank.flatten()
         gpu_comm.Gatherv(
@@ -204,22 +367,19 @@ def get_singlePoint(sdc, eng,  partsPerGPU, partsPerNode, node_id, node_rank, ra
             root=0)
     else:
         gpu_comm.Gatherv(dValOnRank, [full_dVals, eValOnRank_SIZES], root=0)
-        gpu_comm.Gatherv(eValOnRank, [full_eVals, eValOnRank_SIZES], root=0) #### oldR
+        gpu_comm.Gatherv(eValOnRank, [full_eVals, eValOnRank_SIZES], root=0)
 
+    # These are the FULL versions of the corresponding lists above. See the 
+    # above documentation for which  all of these lists are explained.
     eVal_LIST = gpu_comm.gather(eValOnRank_list, root=0)
     dVal_LIST = gpu_comm.gather(dValOnRank_list, root=0)
     NH_Nh_Hs_LIST = gpu_comm.gather(NH_Nh_Hs_list, root=0)
     core_indices_in_sub_expanded_LIST = gpu_comm.gather(core_indices_in_sub_expanded_list, root=0)
     Nocc_LIST = gpu_comm.gather(Nocc_list, root=0)
 
-    #######
-    #######
+    # Flatten the nested list of lists into a single list of tensors.
+    # There is one tensor per CH partition.
     if rank == 0:
-        # Q_LIST = []
-        # for i in range(gpu_comm.Get_size()):
-        #     Q_LIST.append(torch.load('Q/Q_list_{}.pt'.format(i)))
-
-        # Flatten the nested list of lists into a single list of tensors. One tensor per CH.
         eVal_LIST = list(itertools.chain(*eVal_LIST))
         dVal_LIST = list(itertools.chain(*dVal_LIST))
         Q_LIST = list(itertools.chain(*Q_LIST))
@@ -228,18 +388,19 @@ def get_singlePoint(sdc, eng,  partsPerGPU, partsPerNode, node_id, node_rank, ra
         Nocc_LIST = list(itertools.chain(*Nocc_LIST))
     else:
         Q_LIST = None
-    if node_rank == 0: print("| t commLists {:>9.4f} (s)".format(time.perf_counter() - tic), rank)
+
+    if node_rank == 0:
+        print("| t commLists {:>9.4f} (s)".format(time.perf_counter() - tic),
+              rank)
 
     if rank == 0:
         tic = time.perf_counter()
-        #######
-        #######
-        mu0 = get_mu(mu0, full_dVals, full_eVals, sdc.Tel, sy.numel/2) #### oldR
-
-        #mu0 = get_mu(mu0, np.concatenate(dVal_LIST, axis = -1), torch.cat(eVal_LIST, dim = -1), sdc.Tel, sy.nocc) # chemical potential calculation
+        mu0 = get_mu(mu0, full_dVals, full_eVals, sdc.Tel, sy.numel/2)  # oldR
         print("Time mu0 {:>9.4f} (s)".format(time.perf_counter() - tic))
 
-    return EELEC, eVal_LIST, Q_LIST, NH_Nh_Hs_LIST, core_indices_in_sub_expanded_LIST, Nocc_LIST, mu0
+    return (EELEC, eVal_LIST, Q_LIST,
+            NH_Nh_Hs_LIST, core_indices_in_sub_expanded_LIST,
+            Nocc_LIST, mu0)
 
 
 ## Single point calculation

From b75c10bcbdfd5e6a994840d8fff909f63c9f5e1d Mon Sep 17 00:00:00 2001
From: Robert <robertmstanton@proton.me>
Date: Tue, 13 May 2025 10:35:27 -0600
Subject: [PATCH 3/5] Docs for all but adaptiveDM done

---
 src/sedacs/driver/graph_adaptive.py | 661 +++++++++++++++++++++-------
 1 file changed, 507 insertions(+), 154 deletions(-)

diff --git a/src/sedacs/driver/graph_adaptive.py b/src/sedacs/driver/graph_adaptive.py
index 4abf680..c9e84cf 100644
--- a/src/sedacs/driver/graph_adaptive.py
+++ b/src/sedacs/driver/graph_adaptive.py
@@ -74,7 +74,6 @@ def get_singlePoint(sdc,
 
     Parameters
     ----------
-
     sdc:
         The SEDACS driver.
     eng: Engine
@@ -120,6 +119,24 @@ def get_singlePoint(sdc,
         is a community size.
     graph_maskd:
         Diagonal mask for the contracted density matrix, P_contr.
+
+    Returns
+    -------
+    EELEC:
+        The total electronic energy.
+    eVal_LIST:
+        List of the eigenvalues by partition.
+    Q_LIST:
+        List of the eigenvectors by partition.
+    NH_Nh_Hs_LIST:
+        List of NHeavy, NHydrogen, Hydrogen indices by partition.
+    core_indices_in_sub_expanded_LIST:
+        List of core indices within each partition, using the expanded indexing
+        convention required by PYSEQM.
+    Nocc_LIST:
+        List of occupation numbyers by partition.
+    mu0:
+        Global chemical potential.
     """
     # Handles the scenario where N_GPU < N_ranks_per_node
     partIndex1 = (node_rank) * partsPerGPU + node_id*partsPerNode
@@ -403,77 +420,183 @@ def get_singlePoint(sdc,
             Nocc_LIST, mu0)
 
 
-## Single point calculation
-# @brief Construct a connectivity graph based on constructing density matrices of parts of the system.
-def get_singlePoint_charges(sdc, eng,  partsPerGPU, partsPerNode, node_id, node_rank, rank, gpu_comm, parts, partsCoreHalo, sy, hindex,gscf, mu0,
-                    molecule_whole, P_contr, graph_for_pairs, graph_maskd):
-    '''
-    Function calculates CH hamiltonians (ham), then eVals and dVals on each rank of gpu_comm.
-    Then it gathers everything on global rank 0, computes chemical potential mu0.
+def get_singlePoint_charges(sdc,
+                            eng,
+                            partsPerGPU,
+                            partsPerNode,
+                            node_id,
+                            node_rank,
+                            rank,
+                            gpu_comm,
+                            parts,
+                            partsCoreHalo,
+                            sy,
+                            hindex,
+                            gscf,
+                            mu0,
+                            molecule_whole,
+                            P_contr,
+                            graph_for_pairs,
+                            graph_maskd):
+    """
+    Function calculates CH hamiltonians (ham), then eVals and dVals on each
+    rank of gpu_comm. Then it gathers everything on global rank 0, computes
+    chemical potential mu0.
+
+    Note that in this context:
+    eVals -> Eigenvalues
+    Q     -> Eigenvectors
+    dVals -> Norm over the *CORE PART* of the Eigenvectors, Q.
+
+    Parameters
+    ----------
     sdc:
-    eng:
-    partsPerGPU: number of CH processed by one rank.
-    partsPerNode: number of CH processed on one node.
+        The SEDACS driver.
+    eng: Engine
+        The SEDACS engine.
+
+    partsPerGPU:
+        number of CH processed by one rank.
+    partsPerNode:
+        number of CH processed on one node.
     node_id:
-    node_rank: local rank on a node. E.g., for [0,1,2,3] [4,5,6,7], global rank 4 is local rank 0.
-    rank: global rank
-    gpu_comm: global communicator for ranks with GPU.
-    If on CPU, all ranks are involved. gpu_comm is identical to master comm. 
-    If on GPU and num_gpus (per node) == node_numranks, gpu_comm is identical to master comm.
-    If on GPU and num_gpus (per node) <= node_numranks, gpu_comm is different form master comm. For example, 8 ranks on two nodes [0,1,2,3] and [4,5,6,7] with 2 GPUs per node. In that case, only ranks [0,1] and [4,5] are involved. They, however, become [0,1] [2,3] within gpu_comm.
-    parts: list of cores
-    partsCoreHalo: list of core+halo
+        Current node identifier.
+    node_rank:
+        Local rank on a node. E.g., for [0,1,2,3] [4,5,6,7],
+        global rank 4 is local rank 0.
+    rank:
+        Global rank.
+    gpu_comm:
+        Global communicator for ranks with GPU.
+        If on CPU, all ranks are involved. gpu_comm is identical to master comm
+        If on GPU and num_gpus (per node) == node_numranks, gpu_comm is
+        identical to master comm.
+        If on GPU and num_gpus (per node) <= node_numranks, gpu_comm is
+        different form master comm. For example, 8 ranks on two nodes [0,1,2,3]
+        and [4,5,6,7] with 2 GPUs per node. In that case, only ranks [0,1] and
+        [4,5] are involved. They, however, become [0,1] [2,3] within gpu_comm.
+    parts:
+        List of core indices.
+    partsCoreHalo:
+        List of core+halo indices.
     sy:
+        System object.
     hindex:
-    mu0: chemical potential
-    molecule_whole: pyseqm molecule object
-    P_contr: contracted dm. (sy.nats, sdc.maxDeg, 4,4)
-    graph_for_pairs: graph of communities. E.g. graph_for_pairs[i] is a whole CH community in which atom i is a core atom, including itself. graph_for_pairs[i][0] is a community size
-    graph_maskd: diagonal mask for P_contr
-    '''
-    ### Instead of this:
-    # partIndex1 = rank * partsPerRank
-    # partIndex2 = (rank + 1) * partsPerRank
-    ### we do this because there might be fewer GPUs per node than ranks per node.
+        Atom->orbtial index mapping.
+    mu0:
+        The chemical potential.
+    molecule_whole:
+        PYSEQM Molecule object.
+    P_contr:
+        Contracted density matrix. (sy.nats, sdc.maxDeg, 4, 4)
+    graph_for_pairs:
+        Graph of communities. E.g. graph_for_pairs[i] is a whole CH community
+        in which atom i is a core atom, including itself. graph_for_pairs[i][0]
+        is a community size.
+    graph_maskd:
+        Diagonal mask for the contracted density matrix, P_contr.
+
+    Returns
+    -------
+    EELEC:
+        The total electronic energy.
+    eVal_LIST:
+        List of the eigenvalues by partition.
+    Q_LIST:
+        List of the eigenvectors by partition.
+    NH_Nh_Hs_LIST:
+        List of NHeavy, NHydrogen, Hydrogen indices by partition.
+    core_indices_in_sub_expanded_LIST:
+        List of core indices within each partition, using the expanded indexing
+        convention required by PYSEQM.
+    Nocc_LIST:
+        List of occupation numbyers by partition.
+    mu0:
+        Global chemical potential.
+    """
+    # Handles the scenario where N_GPU < N_ranks_per_node
     partIndex1 = (node_rank) * partsPerGPU + node_id*partsPerNode
     partIndex2 = (node_rank + 1) * partsPerGPU + node_id*partsPerNode
 
-    if sdc.UHF: # open shell
+    # All arrays in thsi code block are flattened 1D numpy arrays.
+    if sdc.UHF:  # Open shell.
         dValOnRank = np.empty((2, 0))
-        eValOnRank = np.empty((2, 0)) # this will store flattened eVals for all CH. 1d np array.
-    else: # closed shell
-        dValOnRank = np.array([]) # this will store flattened dVals for all CH. 1d np array. For mu0
-        eValOnRank = np.array([]) # this will store flattened eVals for all CH. 1d np array.
-    eValOnRank_list = [] # this will store eVals arranged per CH. List of 1D torch tensors. For mu0 and later for dm
-    dValOnRank_list = [] # this will store dVals arranged per CH. List of 1D torch tensors. For mu0 and later for dm
-    Q_list = [] # Eigenvectors for each part
-    Nocc_list = [] # Number of occupied orbitals for each part. It's not used in thermal HF but lets keep this option.
-    core_indices_in_sub_expanded_list = [] # Indices of core hamiltonian in core+halo hamiltonian. Might be useful when core and halo atoms are shuffled to stay sorted, like in PySEQM.
-    NH_Nh_Hs_list = [] # list of [number_of_heavy_atoms, number_of_hydrogens, dim_of_coreHalo_ham, nocc]
-    EELEC = 0.0 # this will sum electronic energy of CHs on the current rank
+        eValOnRank = np.empty((2, 0))
+    else:  # Closed shell.
+        dValOnRank = np.array([])
+        eValOnRank = np.array([])
+    eValOnRank_list = []
+    dValOnRank_list = []
+
+    # Eigenvectors for each partition
+    Q_list = []
+
+    # Number of occupied orbitals for each partition.
+    # NOTE: This is not used for thermal HF.
+    Nocc_list = []
+
+    # Indices of core hamiltonian in core+halo hamiltonian. Might be useful
+    # when core and halo atoms are shuffled to stay sorted, like in PySEQM.
+    core_indices_in_sub_expanded_list = []
+
+    # List -> [number_heavy_atoms, number_hydrogens, dim_H_C+H, N_occupied.
+    NH_Nh_Hs_list = []
+
+    # Result for the summed eletronic energies of C+Hs on the current rank.
+    EELEC = 0.0
+
     chargesOnRank = None
     subSysOnRank = []
 
+    # Loop over the partitions allocated to the current rank.
     for partIndex in range(partIndex1, partIndex2):
+
         ticHam = time.perf_counter()
-        #print("\n Rank, part", rank, partIndex)
+
+        # Extract the subsystem information. (CORE + HALO).
         subSy = System(len(partsCoreHalo[partIndex]))
         subSy.symbols = sy.symbols
-        subSy.coords, subSy.types = extract_subsystem(sy.coords, sy.types, sy.symbols, partsCoreHalo[partIndex])
-        if(gscf == 0): 
+        subSy.coords, subSy.types = extract_subsystem(sy.coords,
+                                                      sy.types,
+                                                      sy.symbols,
+                                                      partsCoreHalo[partIndex])
+
+        if (gscf == 0):
             subSy.charges = np.zeros(len(subSy.types))
 
+        # Extract the subsystem information. (CORE ONLY).
         subSyCore = System(len(parts[partIndex]))
         subSyCore.symbols = sy.symbols
-        subSyCore.coords,subSyCore.types = extract_subsystem(sy.coords,sy.types,sy.symbols,parts[partIndex])
+        subSyCore.coords, subSyCore.types = extract_subsystem(sy.coords,
+                                                              sy.types,
+                                                              sy.symbols,
+                                                              parts[partIndex])
 
         if sdc.writeGeom:
+
+            # Write subsytem to PDB and XYZ. (CORE + HALO).
             partFileName = "subSy" + str(rank) + "_" + str(partIndex) + ".pdb"
-            write_pdb_coordinates(partFileName, subSy.coords, subSy.types, subSy.symbols)
-            write_xyz_coordinates("subSy" + str(rank) + "_" + str(partIndex) + ".xyz", subSy.coords, subSy.types, subSy.symbols)
+            write_pdb_coordinates(partFileName,
+                                  subSy.coords,
+                                  subSy.types,
+                                  subSy.symbols)
+            write_xyz_coordinates("subSy" + str(rank) + "_" + str(partIndex) + ".xyz",
+                                  subSy.coords,
+                                  subSy.types,
+                                  subSy.symbols)
+
             partCoreFileName = "CoreSubSy"+str(rank)+"_"+str(partIndex)+".pdb"
-            write_pdb_coordinates(partCoreFileName,subSyCore.coords,subSyCore.types,subSyCore.symbols)
-            write_xyz_coordinates("CoreSubSy"+str(rank)+"_"+str(partIndex)+".xyz",subSyCore.coords,subSyCore.types,subSyCore.symbols)
+
+            # Write subsytem to PDB and XYZ. (CORE ONLY).
+            write_pdb_coordinates(partCoreFileName,
+                                  subSyCore.coords,
+                                  subSyCore.types,
+                                  subSyCore.symbols)
+
+            write_xyz_coordinates("CoreSubSy"+str(rank)+"_"+str(partIndex)+".xyz",
+                                  subSyCore.coords,
+                                  subSyCore.types,
+                                  subSyCore.symbols)
 
         
         core_indices_in_sub, core_indices_in_sub_expanded, hindex_sub = \
@@ -605,191 +728,421 @@ def get_singlePoint_charges(sdc, eng,  partsPerGPU, partsPerNode, node_id, node_
         #mu0 = get_mu(mu0, np.concatenate(dVal_LIST, axis = -1), torch.cat(eVal_LIST, dim = -1), sdc.Tel, sy.nocc) # chemical potential calculation
         print("Time mu0 {:>9.4f} (s)".format(time.perf_counter() - tic))
 
-    return EELEC, eVal_LIST, Q_LIST, NH_Nh_Hs_LIST, core_indices_in_sub_expanded_LIST, Nocc_LIST, mu0
-
-def get_singlePointForces(sdc, eng, partsPerGPU, partsPerNode, node_id, node_rank, rank, parts, partsCoreHalo, sy, hindex, forces, molecule_whole, P, P_contr, graph_for_pairs, graph_maskd):
+    return (EELEC,
+            eVal_LIST,
+            Q_LIST,
+            NH_Nh_Hs_LIST,
+            core_indices_in_sub_expanded_LIST,
+            Nocc_LIST,
+            mu0)
+
+
+def get_singlePointForces(sdc,
+                          eng,
+                          partsPerGPU,
+                          partsPerNode,
+                          node_id,
+                          node_rank,
+                          rank,
+                          parts,
+                          partsCoreHalo,
+                          sy,
+                          hindex,
+                          forces,
+                          molecule_whole,
+                          P,
+                          P_contr,
+                          graph_for_pairs,
+                          graph_maskd):
     '''
-    Function calculates forces on ALL atoms via backprop through an electronic energy a CH. Updates forces in-place.
-    Electronic energy is obtained from a "rectangular" hamiltonian.
+    Function calculates forces on ALL atoms via backprop through an electronic
+    energy a CH. Electronic energy is obtained from a "rectangular" hamiltonian
+
+    ***UPDATES FORCES IN-PLACE***
+
+    Parameters
+    ----------
     sdc:
-    eng:
-    partsPerGPU: number of CH processed by one rank.
-    partsPerNode: number of CH processed on one node.
+        The SEDACS driver.
+    eng: Engine
+        The SEDACS engine.
+
+    partsPerGPU:
+        number of CH processed by one rank.
+    partsPerNode:
+        number of CH processed on one node.
     node_id:
-    node_rank: local rank on a node. E.g., for [0,1,2,3] [4,5,6,7], global rank 4 is local rank 0.
-    rank: global rank
-    If on CPU, all ranks are involved. gpu_comm is identical to master comm. 
-    If on GPU and num_gpus (per node) == node_numranks, gpu_comm is identical to master comm.
-    If on GPU and num_gpus (per node) <= node_numranks, gpu_comm is different form master comm. For example, 8 ranks on two nodes [0,1,2,3] and [4,5,6,7] with 2 GPUs per node. In that case, only ranks [0,1] and [4,5] are involved. They, however, become [0,1] [2,3] within gpu_comm.
-    parts: list of cores
-    partsCoreHalo: list of core+halo
+        Current node identifier.
+    node_rank:
+        Local rank on a node. E.g., for [0,1,2,3] [4,5,6,7],
+        global rank 4 is local rank 0.
+    rank:
+        Global rank.
+    gpu_comm:
+        Global communicator for ranks with GPU.
+        If on CPU, all ranks are involved. gpu_comm is identical to master comm
+        If on GPU and num_gpus (per node) == node_numranks, gpu_comm is
+        identical to master comm.
+        If on GPU and num_gpus (per node) <= node_numranks, gpu_comm is
+        different form master comm. For example, 8 ranks on two nodes [0,1,2,3]
+        and [4,5,6,7] with 2 GPUs per node. In that case, only ranks [0,1] and
+        [4,5] are involved. They, however, become [0,1] [2,3] within gpu_comm.
+    parts:
+        List of core indices.
+    partsCoreHalo:
+        List of core+halo indices.
     sy:
+        System object.
     hindex:
-    forces: array of atomic forces (n_atoms, 3)
-    molecule_whole: pyseqm molecule object
-    P: legacy, none
-    P_contr: contracted dm. (sy.nats, sdc.maxDeg, 4,4)
-    graph_for_pairs: graph of communities. E.g. graph_for_pairs[i] is a whole CH community in which atom i is a core atom, including itself. graph_for_pairs[i][0] is a community size
-    graph_maskd: diagonal mask for P_contr
+        Atom->orbtial index mapping.
+    mu0:
+        The chemical potential.
+    molecule_whole:
+        PYSEQM Molecule object.
+    P_contr:
+        Contracted density matrix. (sy.nats, sdc.maxDeg, 4, 4)
+    graph_for_pairs:
+        Graph of communities. E.g. graph_for_pairs[i] is a whole CH community
+        in which atom i is a core atom, including itself. graph_for_pairs[i][0]
+        is a community size.
+    graph_maskd:
+        Diagonal mask for the contracted density matrix, P_contr.
+
+    Returns
+    -------
+    EELEC:
+        The total electronic energy.
     '''
     partIndex1 = (node_rank) * partsPerGPU + node_id*partsPerNode
     partIndex2 = (node_rank + 1) * partsPerGPU + node_id*partsPerNode
     EELEC = 0.0
+
+    # Loop over the partitions allocated to the current rank.
     for partIndex in range(partIndex1, partIndex2):
-        #print("Rank, part", rank, partIndex)
+
+        # Extract the subsystem information. (CORE + HALO).
         subSy = System(len(partsCoreHalo[partIndex]))
         subSy.symbols = sy.symbols
-        subSy.coords, subSy.types = extract_subsystem(sy.coords, sy.types, sy.symbols, partsCoreHalo[partIndex])
+        subSy.coords, subSy.types = extract_subsystem(sy.coords,
+                                                      sy.types,
+                                                      sy.symbols,
+                                                      partsCoreHalo[partIndex])
 
+        # Extract indices needed for generating the Hamiltonian.
         _, core_indices_in_sub_expanded, _ = \
-            get_coreHalo_ham_inds(parts[partIndex], partsCoreHalo[partIndex], sdc, sy, subSy)
+            get_coreHalo_ham_inds(parts[partIndex],
+                                  partsCoreHalo[partIndex],
+                                  sdc,
+                                  sy,
+                                  subSy)
 
         tic = time.perf_counter()
         ham_timing = {}
         tmp_molecule_whole = copy.deepcopy(molecule_whole)
         if sdc.doForces:
             tmp_molecule_whole.coordinates.requires_grad_(True)
-        f, eElec = get_hamiltonian(sdc, eng,subSy.coords,subSy.types,subSy.symbols, 
-                              parts[partIndex], partsCoreHalo[partIndex], tmp_molecule_whole, P_contr, graph_for_pairs, graph_maskd, core_indices_in_sub_expanded, ham_timing, doForces = True, verbose=False)
+
+        # Get hamiltonian with forces.
+        f, eElec = get_hamiltonian(sdc,
+                                   eng,
+                                   subSy.coords,
+                                   subSy.types,
+                                   subSy.symbols,
+                                   parts[partIndex],
+                                   partsCoreHalo[partIndex],
+                                   tmp_molecule_whole,
+                                   P_contr,
+                                   graph_for_pairs,
+                                   graph_maskd,
+                                   core_indices_in_sub_expanded,
+                                   ham_timing,
+                                   doForces=True,
+                                   verbose=False)
+
         del tmp_molecule_whole
 
+        # Modify forces in-place.
         forces += f
+        
+        # Sum electronic energy.
         EELEC += eElec
-        #print("EelecCH {:>7.3f} eV |".format(eElec.item()), end=" ")
+
+        # Print timing.
         ham_timing['TOT'] = time.perf_counter() - tic
         formatted_string = " | ".join(f"{key} {value:8.3f}" for key, value in ham_timing.items())
-        print('Rank', rank, 'part', partIndex, ':', formatted_string, "|| EelecCH {:>7.3f} eV ||".format(eElec.item()))
+        print('Rank', rank, 'part', partIndex, ':', formatted_string,
+              "|| EelecCH {:>7.3f} eV ||".format(eElec.item()))
         del eElec, subSy, f
-        #print("TOT", time.perf_counter() - tic, "(s)")
+
     return EELEC
 
-def get_singlePointDM(sdc, eng, rank, node_numranks, node_comm, parts, partsCoreHalo, sy, hindex, mu0, P_contr, graph_for_pairs,
-                      eValOnRank_list, Q_list, NH_Nh_Hs_list, core_indices_in_sub_expanded_list):
-    '''
-    Function updates P_contr with core columns of CH dm. This is done in parallel on ALL local ranks of node 0. On CPU. That's slow.
+
+def get_singlePointDM(sdc,
+                      eng,
+                      rank,
+                      node_numranks,
+                      node_comm,
+                      parts,
+                      partsCoreHalo,
+                      sy,
+                      hindex,
+                      mu0,
+                      P_contr,
+                      graph_for_pairs,
+                      eValOnRank_list,
+                      Q_list,
+                      NH_Nh_Hs_list,
+                      core_indices_in_sub_expanded_list):
+    """
+    Function updates P_contr with core columns of CH dm. This is done in
+    parallel on ALL local ranks of node 0 on CPU. TODO: Improve the efficiency.
+
+    Parameters
+    ----------
     sdc:
-    eng:
-    rank: global rank
-    node_numranks: nomber of ranks on a node (loca ranks)
-    node_comm: local comminicator on a node
-    parts: list of cores
-    partsCoreHalo: list of core+halo
+        The SEDACS driver.
+    eng: Engine
+        The SEDACS engine.
+    rank:
+        Current global rank.
+    node_numranks:
+        Number of ranks on a node (local ranks).
+    node_comm:
+        Local comminicator on a node.
+    parts:
+        List of core indices.
+    partsCoreHalo:
+        List of core+halo indices.
     sy:
-    hindex: orbital index for each atom in the system
-    mu0: chemical potential
-    P_contr: contracted dm. (sy.nats, sdc.maxDeg, 4,4)
-    graph_for_pairs: graph of communities. E.g. graph_for_pairs[i] is a whole CH community in which atom i is a core atom, including itself. graph_for_pairs[i][0] is a community size
-    eValOnRank_list: eigenvalues of CHs. Here, for all CHs.
-    Q_list: eigenvectors of CHs. Here, only those used by this rank are present.
-    NH_Nh_Hs_list: list of [number_of_heavy_atoms, number_of_hydrogens, dim_of_coreHalo_ham]. Here, for all CHs.
-    core_indices_in_sub_expanded_list: indices of core columns in CH. E.g., CH[i] contains atoms [0,1,2,3], core atoms are [1,3], 4 AOs per atom. Then, core_indices_in_sub_expanded_list[i] is [4,5,6,7, 12,13,14,15].
-    '''
-    if rank == 0: print("eElec:   {:>10.8f} | \u0394E| {:>10.8f}".format(sdc.EelecNew, abs(sdc.EelecNew - sdc.EelecOld)),)
+        System object.
+    hindex:
+        Atom->orbtial index mapping.
+    mu0:
+        The chemical potential.
+    P_contr:
+        Contracted density matrix. Shape: (sy.nats, sdc.maxDeg, 4,4)
+    graph_for_pairs:
+        Graph of communities. E.g. graph_for_pairs[i] is a whole CH community
+        in which atom i is a core atom, including itself. graph_for_pairs[i][0]
+        is a community size.
+    eValOnRank_list:
+        Eigenvalues of CHs. Here, for all CHs.
+    Q_list:
+        Eigenvectors of CHs. Here, only those used by this rank are present.
+    NH_Nh_Hs_list:
+        List of [number_of_heavy_atoms, number_of_hydrogens,
+        dim_of_coreHalo_ham]. Here, for all CHs.
+    core_indices_in_sub_expanded_list:
+        Indices of core columns in CH. E.g., CH[i] contains atoms [0,1,2,3],
+        core atoms are [1,3], 4 AOs per atom. Then,
+        core_indices_in_sub_expanded_list[i] is [4,5,6,7, 12,13,14,15].
+
+    Returns
+    -------
+    graphOnRank:
+        Get connectivity graph for the dm of current CH.
+    """
+    if rank == 0:
+        print("eElec:   {:>10.8f} | \u0394E| {:>10.8f}"
+              .format(sdc.EelecNew, abs(sdc.EelecNew-sdc.EelecOld)))
+
     sdc.EelecOld = sdc.EelecNew
 
+    # Parititona per rank and determine the partitions the current rank is
+    # responsible for.
     partsPerRank = int(sdc.nparts / node_numranks)
     partIndex1 = rank * partsPerRank
     partIndex2 = (rank + 1) * partsPerRank
     graphOnRank = None
-
     maxDifList = []
     sumDifTot = 0
     P_contr_maxDifList = []
     P_contr_sumDifTot = 0
-    for partIndex, i in zip(range(partIndex1,partIndex2), range(partsPerRank)):
-        # this will calculate the DM in subsys and update the whole DM
-        # rho_ren is a dm contructed with electronic temperature. Its shaped into 4x4 blocks, even for hydrogen atoms, as required by pyseqm
-        rho_ren, maxDif, sumDif = get_density_matrix_renorm(sdc, eng, sdc.Tel, mu0, P_contr, graph_for_pairs,
-                                            eValOnRank_list[partIndex], Q_list[i], NH_Nh_Hs_list[partIndex], core_indices_in_sub_expanded_list[partIndex]) 
-        indices_in_sub = np.linspace(0,len(partsCoreHalo[partIndex])-1, len(partsCoreHalo[partIndex]), dtype = eng.np_int_dt) # indices for CH dm. [0:n_atoms]
-        core_indices_in_sub = indices_in_sub[np.isin(partsCoreHalo[partIndex], parts[partIndex])] # core column blocks in CH dm (assuming its shaped as [n_atoms, n_atoms, 4, 4])
+
+
+    for partIndex, i in zip(range(partIndex1, partIndex2), range(partsPerRank)):
+
+        # This will calculate the DM in subsys and update the whole DM
+        # rho_ren is a dm contructed with electronic temperature. It's shaped
+        # into 4x4 blocks, even for hydrogen atoms, as required by PYSEQM.
+
+        rho_ren, maxDif, sumDif = get_density_matrix_renorm(sdc,
+                                                            eng,
+                                                            sdc.Tel,
+                                                            mu0,
+                                                            P_contr,
+                                                            graph_for_pairs,
+                                                            eValOnRank_list[partIndex],
+                                                            Q_list[i],
+                                                            NH_Nh_Hs_list[partIndex],
+                                                            core_indices_in_sub_expanded_list[partIndex])
+
+        indices_in_sub = np.linspace(0,
+                                     len(partsCoreHalo[partIndex])-1,
+                                     len(partsCoreHalo[partIndex]),
+                                     dtype=eng.np_int_dt)  # Indices for CH DM.
+
+        # Core column blocks in CH DM (assuming its shaped as:
+        # [n_atoms, n_atoms, 4, 4])
+        core_indices_in_sub = indices_in_sub[np.isin(partsCoreHalo[partIndex],
+                                                     parts[partIndex])]
         P_contr_maxDif = []
         P_contr_sumDif = 0
-        if sdc.UHF: # open shell
-            ### vectorized. Faster for larger cores.
-            max_len = graph_for_pairs[parts[partIndex][0]][0]
-            
-            TMP1 = P_contr[:, :max_len,parts[partIndex]] # get part of P_contr that corresponds to cores of current CH
-            TMP2 = rho_ren.reshape((1, 2, NH_Nh_Hs_list[partIndex][0]+NH_Nh_Hs_list[partIndex][1],4, NH_Nh_Hs_list[partIndex][0]+NH_Nh_Hs_list[partIndex][1],4)) \
-                                    .transpose(3,4).reshape(2, (NH_Nh_Hs_list[partIndex][0]+NH_Nh_Hs_list[partIndex][1]), (NH_Nh_Hs_list[partIndex][0]+NH_Nh_Hs_list[partIndex][1]),4,4).transpose(3,4).transpose(1,2)[:,:,core_indices_in_sub] # get core column blocks of CH
-            P_contr_maxDif.append(torch.max(torch.abs(TMP1 - TMP2)).cpu().numpy()) # max difference in dm elements
-            P_contr_sumDif += torch.sum(torch.abs(TMP1 - TMP2)).cpu().numpy() # sum of abs differences between new and old dm.
-            P_contr[:,:max_len,parts[partIndex]] = (1-sdc.alpha)*TMP1 + sdc.alpha * TMP2 # update dm
-                
-            rho_ren = pack(rho_ren[0]+rho_ren[1], NH_Nh_Hs_list[partIndex][0], NH_Nh_Hs_list[partIndex][1]) # packing rho_ren from 4x4 blocks into normal form based on number of AOs per atom.
+        if sdc.UHF:  # Open shell.
 
-        else:
-            ### vectorized. Faster for larger cores.
+            # Vectorized. Faster for larger cores.
             max_len = graph_for_pairs[parts[partIndex][0]][0]
-            TMP1 = P_contr[:max_len,parts[partIndex]] # get part of P_contr that corresponds to cores of current CH
-            TMP2 = rho_ren.reshape((1, NH_Nh_Hs_list[partIndex][0]+NH_Nh_Hs_list[partIndex][1], 4, NH_Nh_Hs_list[partIndex][0]+NH_Nh_Hs_list[partIndex][1],4)) \
-                                    .transpose(2,3).reshape((NH_Nh_Hs_list[partIndex][0]+NH_Nh_Hs_list[partIndex][1]), (NH_Nh_Hs_list[partIndex][0]+NH_Nh_Hs_list[partIndex][1]),4,4).transpose(2,3).transpose(0,1)[:,core_indices_in_sub] # get core column blocks of CH
-            
-            # print(TMP1, TMP2)
-            # np.save("TMP1_mergeattempt.npy", TMP1)
-            # np.save("TMP2_mergeattempt.npy", TMP2)
-            # exit(1)
-            P_contr_maxDif.append(torch.max(torch.abs(TMP1 - TMP2)).cpu().numpy()) # max difference in dm elements
-            P_contr_sumDif += torch.sum(torch.abs(TMP1 - TMP2)).cpu().numpy() # sum of abs differences between new and old dm.
-            P_contr[:max_len,parts[partIndex]] = (1-sdc.alpha)*TMP1 + sdc.alpha * TMP2 # update dm
-
-            ### Loop. Faster for many small cores (?).
-            # for i in range(len(parts[partIndex])):
-            #     tmp1 = P_contr[:graph_for_pairs[parts[partIndex][i]][0],parts[partIndex][i]]
-            #     tmp2 = rho_ren.reshape((1, NH_Nh_Hs_list[partIndex][0]+NH_Nh_Hs_list[partIndex][1],4, NH_Nh_Hs_list[partIndex][0]+NH_Nh_Hs_list[partIndex][1],4)) \
-            #                         .transpose(2,3).reshape((NH_Nh_Hs_list[partIndex][0]+NH_Nh_Hs_list[partIndex][1]), (NH_Nh_Hs_list[partIndex][0]+NH_Nh_Hs_list[partIndex][1]),4,4)[core_indices_in_sub[i]].transpose(1,2)
-
-            #     P_contr_maxDif.append(torch.max(torch.abs(tmp1 - tmp2)).cpu().numpy())
-            #     P_contr_sumDif += torch.sum(torch.abs(tmp1 - tmp2)).cpu().numpy()
-            #     P_contr[:graph_for_pairs[parts[partIndex][i]][0],parts[partIndex][i]] = (1-sdc.alpha)*tmp1 + sdc.alpha*tmp2
-            #     del tmp1, tmp2
-                
-            rho_ren = pack(rho_ren, NH_Nh_Hs_list[partIndex][0], NH_Nh_Hs_list[partIndex][1]) # packing rho_ren from 4x4 blocks into normal form based on number of AOs per atom.
-
 
+            # Get part of P_contr that corresponds to cores of current CH
+            TMP1 = P_contr[:, :max_len, parts[partIndex]]
+
+            # Get core column blocks of CH
+            TMP2 = rho_ren.reshape((1,
+                                    2,
+                                    NH_Nh_Hs_list[partIndex][0]+NH_Nh_Hs_list[partIndex][1],
+                                    4,
+                                    NH_Nh_Hs_list[partIndex][0]+NH_Nh_Hs_list[partIndex][1],
+                                    4)) \
+                                    .transpose(3,4).reshape(2,
+                                                            (NH_Nh_Hs_list[partIndex][0]+NH_Nh_Hs_list[partIndex][1]),
+                                                            (NH_Nh_Hs_list[partIndex][0]+NH_Nh_Hs_list[partIndex][1]),
+                                                            4,
+                                                            4).transpose(3,4).transpose(1,2)[:,:,core_indices_in_sub] 
+
+            # Max difference in DM elements.
+            P_contr_maxDif.append(torch.max(torch.abs(TMP1 - TMP2)).cpu().numpy())
+
+            # Sum of abs differences between new and old DM.
+            P_contr_sumDif += torch.sum(torch.abs(TMP1 - TMP2)).cpu().numpy()
+
+            # Update DM.
+            P_contr[:, :max_len, parts[partIndex]] = (1-sdc.alpha)*TMP1 + sdc.alpha * TMP2
+
+            # Packing rho_ren from 4x4 blocks into normal form based on number
+            # of AOs per atom.
+            rho_ren = pack(rho_ren[0]+rho_ren[1],
+                           NH_Nh_Hs_list[partIndex][0],
+                           NH_Nh_Hs_list[partIndex][1])
+
+        else:  # Closed shell. See documentation in open-shell.
+            max_len = graph_for_pairs[parts[partIndex][0]][0]
+            TMP1 = P_contr[:max_len, parts[partIndex]]
+            TMP2 = rho_ren.reshape((1,
+                                    NH_Nh_Hs_list[partIndex][0]+NH_Nh_Hs_list[partIndex][1],
+                                    4,
+                                    NH_Nh_Hs_list[partIndex][0]+NH_Nh_Hs_list[partIndex][1],
+                                    4)) \
+                                    .transpose(2,3).reshape((NH_Nh_Hs_list[partIndex][0]+NH_Nh_Hs_list[partIndex][1]),
+                                                            (NH_Nh_Hs_list[partIndex][0]+NH_Nh_Hs_list[partIndex][1]),
+                                                            4,
+                                                            4).transpose(2, 3).transpose(0, 1)[:, core_indices_in_sub]
+
+            # Max difference in DM elements.
+            P_contr_maxDif.append(torch.max(torch.abs(TMP1 - TMP2)).cpu().numpy())
+
+            # Sum of abs differences between new and old DM.
+            P_contr_sumDif += torch.sum(torch.abs(TMP1 - TMP2)).cpu().numpy()
+
+            # Update DM.
+            P_contr[:max_len,parts[partIndex]] = (1-sdc.alpha)*TMP1 + sdc.alpha * TMP2
+
+            # Packing rho_ren from 4x4 blocks into normal form based on number
+            # of AOs per atom.
+            rho_ren = pack(rho_ren,
+                           NH_Nh_Hs_list[partIndex][0],
+                           NH_Nh_Hs_list[partIndex][1])
+
+
+        # Total max difference.
         P_contr_maxDif = max(P_contr_maxDif)
+
+        # Store max differences.
         P_contr_maxDifList.append(P_contr_maxDif)
+
+        # Total of the summed differences.
         P_contr_sumDifTot += P_contr_sumDif
+
         maxDifList.append(maxDif)
         try:
             sumDifTot += sumDif
         except:
             sumDifTot += 0
-        graphOnRank = collect_graph_from_rho_PYSEQM(graphOnRank, rho_ren, sdc.gthresh, sy.nats, sdc.maxDeg, partsCoreHalo[partIndex], hindex, verb=False) # get connectivity graph for the dm of current CH
+
+        # Get connectivity graph for the dm of current CH
+        graphOnRank = collect_graph_from_rho_PYSEQM(graphOnRank,
+                                                    rho_ren,
+                                                    sdc.gthresh,
+                                                    sy.nats,
+                                                    sdc.maxDeg,
+                                                    partsCoreHalo[partIndex],
+                                                    hindex,
+                                                    verb=False)
         del rho_ren
 
-    print('HERE_DM_1')
+    # Logging.
     print(" MAX |\u0394DM_ij|: {:>10.7f} at SubSy {:>5d}".format(max(P_contr_maxDifList), np.argmax(P_contr_maxDifList)))
     print(" \u03A3   |\u0394DM_ij|: {:>10.7f}".format(P_contr_sumDifTot))
+
     return graphOnRank
 
-def print_memory_usage(rank, node_rank, message):
+
+def print_memory_usage(rank, node_rank, message) -> None:
+    """
+    Prints the memory usage on the desired rank.
+    """
     process = psutil.Process()
     mem_info = process.memory_info()
     print(f"{message} | Rank: {rank}, Node Rank: {node_rank}, Memory Usage: {mem_info.rss / (1024 ** 2):.2f} MB")
+
+
 def tensor_size(tensor):
+    """
+    Returns the size of the desired tensor.
+    """
     return tensor.element_size() * tensor.nelement() / (1024 ** 2)
-# Collect all tensors in the current environment
-def get_tensors():
+
+
+def get_tensors() -> torch.Tensor:
+    """
+    Generator which collects all tensors in the current environment.
+    Relies on the Python garbage collector for persistence. Yields tensors
+    one-by-one.
+    """
     for obj in gc.get_objects():
         try:
             if torch.is_tensor(obj):
                 yield obj
         except Exception as e:
             pass
-def print_attribute_sizes(obj):
+
+
+def print_attribute_sizes(obj) -> None:
+    """
+    Prints size of attributes of the input object.
+    """
     for attr in dir(obj):
         # Skip private or callable attributes
         if attr.startswith("_") or callable(getattr(obj, attr)):
             continue
+
         attribute = getattr(obj, attr)
+
+        # Calculate this size in memory of attributable to the reference of the
+        # object attribute.
         size_bytes = attribute.nbytes if isinstance(attribute, np.ndarray) else attribute.element_size() * attribute.nelement() if isinstance(attribute, torch.Tensor) else sys.getsizeof(attribute)
         size_mb = size_bytes / (1024 ** 2)  # Convert bytes to MB
         print(f"{attr}: {size_mb:.2f} MB")
 
 
-def get_adaptiveDM_PYSEQM(sdc, eng, comm, rank, numranks, sy, hindex, graphNL):
-    '''
+def get_adaptiveDM_PYSEQM(sdc,
+                          eng,
+                          comm,
+                          rank,
+                          numranks,
+                          sy,
+                          hindex,
+                          graphNL):
+    """
     The main driver function. It initializes supplementary comms, dm, graphs, performs scf cycle with graph and dm updates, and then computes forces.
     sdc:
     eng:
@@ -799,7 +1152,7 @@ def get_adaptiveDM_PYSEQM(sdc, eng, comm, rank, numranks, sy, hindex, graphNL):
     sy:
     hindex: orbital index for each atom in the system
     graphNL: initial connectivity graph
-    '''
+    """
     t_INIT = time.perf_counter() # time for scf initialization
     tic = time.perf_counter()
     sdc.EelecOld = 0.0

From 237dc814cdd64ab4078cb0a310985911654c2246 Mon Sep 17 00:00:00 2001
From: Robert <robertmstanton@proton.me>
Date: Tue, 13 May 2025 13:47:42 -0600
Subject: [PATCH 4/5] graph_adaptive.py driver comments done

---
 src/sedacs/driver/graph_adaptive.py | 1003 ++++++++++++++++++++-------
 1 file changed, 766 insertions(+), 237 deletions(-)

diff --git a/src/sedacs/driver/graph_adaptive.py b/src/sedacs/driver/graph_adaptive.py
index c9e84cf..429ee31 100644
--- a/src/sedacs/driver/graph_adaptive.py
+++ b/src/sedacs/driver/graph_adaptive.py
@@ -227,7 +227,7 @@ def get_singlePoint(sdc,
                                   subSy,
                                   device=P_contr.device)
 
-        # Get CH hamiltonian. If on GPU, temporarily redefine molecule_whole 
+        # Get CH hamiltonian. If on GPU, temporarily redefine molecule_whole
         # on GPU (its faster than transfering) and send P_contr to GPU. The
         # idea is to have P_contr in shared memory on each node, then update in
         # parallel on node 0. If initialized on GPU, then well need to transfer
@@ -247,7 +247,7 @@ def get_singlePoint(sdc,
                                                      do_large_tensors=sdc.use_pyseqm_lt,
                                                      device=device)[0]
 
-            # Get the Hamiltonian using the PYSEQM Molecule object for this 
+            # Get the Hamiltonian using the PYSEQM Molecule object for this
             # Core+Halo subsystem.
             ham, eElec = get_hamiltonian(sdc,
                                          eng,
@@ -407,17 +407,21 @@ def get_singlePoint(sdc,
         Q_LIST = None
 
     if node_rank == 0:
-        print("| t commLists {:>9.4f} (s)".format(time.perf_counter() - tic),
-              rank)
+        print("| t commLists {:>9.4f} (s)"
+              .format(time.perf_counter() - tic), rank)
 
     if rank == 0:
         tic = time.perf_counter()
         mu0 = get_mu(mu0, full_dVals, full_eVals, sdc.Tel, sy.numel/2)  # oldR
         print("Time mu0 {:>9.4f} (s)".format(time.perf_counter() - tic))
 
-    return (EELEC, eVal_LIST, Q_LIST,
-            NH_Nh_Hs_LIST, core_indices_in_sub_expanded_LIST,
-            Nocc_LIST, mu0)
+    return (EELEC,
+            eVal_LIST,
+            Q_LIST,
+            NH_Nh_Hs_LIST,
+            core_indices_in_sub_expanded_LIST,
+            Nocc_LIST,
+            mu0)
 
 
 def get_singlePoint_charges(sdc,
@@ -598,61 +602,125 @@ def get_singlePoint_charges(sdc,
                                   subSyCore.types,
                                   subSyCore.symbols)
 
-        
         core_indices_in_sub, core_indices_in_sub_expanded, hindex_sub = \
-            get_coreHalo_ham_inds(parts[partIndex], partsCoreHalo[partIndex], sdc, sy, subSy, device=P_contr.device)
-        
-        # Get CH hamiltonian. If on GPU, temporarily redefine molecule_whole on GPU (its faster than transfering) and send P_contr to GPU.
-        # The idea is to have P_contr in shared memory on each node, then update in parallel on node 0.
-        # If initialized on GPU, then we'll need to transfer it back into shared memory CPU to update in parallel. Ideally, this needs to be fixed.
+            get_coreHalo_ham_inds(parts[partIndex],
+                                  partsCoreHalo[partIndex],
+                                  sdc,
+                                  sy,
+                                  subSy,
+                                  device=P_contr.device)
+
+        # Get CH hamiltonian. If on GPU, temporarily redefine molecule_whole on
+        # GPU (its faster than transfering) and send P_contr to GPU. The idea
+        # is to have P_contr in shared memory on each node, then update input
+        # in parallel on node 0. If initialized on GPU, then we'll need to
+        # transfer it back into shared memory CPU to update in parallel.
+        # Ideally, this needs to be fixed.
         ham_timing = {}
         if sdc.scfDevice == 'cuda':
             device = 'cuda:{}'.format(node_rank)
-            tmp_molecule_whole = get_molecule_pyseqm(sdc, sy.coords, sy.symbols, sy.types, do_large_tensors = sdc.use_pyseqm_lt, device=device)[0]
-            ham, eElec = get_hamiltonian(sdc, eng, subSy.coords, subSy.types, subSy.symbols, 
-                                  parts[partIndex], partsCoreHalo[partIndex], tmp_molecule_whole, P_contr.to(device), graph_for_pairs, graph_maskd, core_indices_in_sub_expanded, ham_timing, verbose=False)
+
+            # Get the temporary PYSEQM Molecule object for this subsystem.
+            # Simply redefine on the proper device rather than transferring
+            # molecule_whole from CPU.
+            tmp_molecule_whole = get_molecule_pyseqm(sdc,
+                                                     sy.coords,
+                                                     sy.symbols,
+                                                     sy.types,
+                                                     do_large_tensors=sdc.use_pyseqm_lt,
+                                                     device=device)[0]
+            # Get the Hamiltonian using the PYSEQM Molecule object for this
+            # Core+Halo subsystem.
+            ham, eElec = get_hamiltonian(sdc,
+                                         eng,
+                                         subSy.coords,
+                                         subSy.types,
+                                         subSy.symbols,
+                                         parts[partIndex],
+                                         partsCoreHalo[partIndex],
+                                         tmp_molecule_whole,
+                                         P_contr.to(device),
+                                         graph_for_pairs,
+                                         graph_maskd,
+                                         core_indices_in_sub_expanded,
+                                         ham_timing,
+                                         verbose=False)
             del tmp_molecule_whole
+
         else:
             device = 'cpu'
-            ham, eElec = get_hamiltonian(sdc, eng,subSy.coords,subSy.types,subSy.symbols, 
-                                  parts[partIndex], partsCoreHalo[partIndex], molecule_whole, P_contr, graph_for_pairs, graph_maskd, core_indices_in_sub_expanded, ham_timing, verbose=False)
-        EELEC += eElec
 
+            # Get the Hamiltonian using the PYSEQM Molecule object for this
+            # Core+Halo subsystem.
+            ham, eElec = get_hamiltonian(sdc, eng,
+                                         subSy.coords,
+                                         subSy.types,
+                                         subSy.symbols,
+                                         parts[partIndex],
+                                         partsCoreHalo[partIndex],
+                                         molecule_whole,
+                                         P_contr,
+                                         graph_for_pairs,
+                                         graph_maskd,
+                                         core_indices_in_sub_expanded,
+                                         ham_timing,
+                                         verbose=False)
 
-        #print("TOT {:>8.3f} (s)".format(time.perf_counter() - tic))
+        EELEC += eElec
 
         # Get eVals, dVals
         tic = time.perf_counter()
         norbs = subSy.nats
         occ = int(float(norbs) / 2.0)  # Get the total occupied orbitals. Not used.
         coreSize = len(parts[partIndex])
-        eVals, dVals, Q, NH_Nh_Hs = get_eVals(eng, sdc, sy, ham, subSy.coords, subSy.symbols, subSy.types, sdc.Tel, mu0,
-                                                core_indices_in_sub, core_indices_in_sub_expanded, hindex_sub,
-                                                coreSize, subSy, subSyCore, parts[partIndex], partsCoreHalo[partIndex], verbose=False)
+        eVals, dVals, Q, NH_Nh_Hs = get_eVals(eng,
+                                              sdc,
+                                              sy,
+                                              ham,
+                                              subSy.coords,
+                                              subSy.symbols,
+                                              subSy.types,
+                                              sdc.Tel,
+                                              mu0,
+                                              core_indices_in_sub,
+                                              core_indices_in_sub_expanded,
+                                              hindex_sub,
+                                              coreSize,
+                                              subSy,
+                                              subSyCore,
+                                              parts[partIndex],
+                                              partsCoreHalo[partIndex],
+                                              verbose=False)
         del ham
         
+        # Append eVals/eVals depending on if open/closed shell.
         if sdc.UHF:
             dValOnRank = np.append(dValOnRank, dVals, axis=1)
             eValOnRank = np.append(eValOnRank, eVals.cpu().numpy(), axis=1)
         else:
             dValOnRank = np.append(dValOnRank, dVals)
             eValOnRank = np.append(eValOnRank, eVals.cpu().numpy())
+
+        # Append eVals/dVals/eigenvectors local to this rank.
         eValOnRank_list.append(eVals.cpu())
         dValOnRank_list.append(dVals)
+
+        # See above documentation for the explanation of all of these lists.
+        # These effective collect the current rank's ifnromation for the above-
+        # mentioned quantities.
         Q_list.append(Q.cpu())
         core_indices_in_sub_expanded_list.append(core_indices_in_sub_expanded)
         NH_Nh_Hs_list.append(NH_Nh_Hs)
         Nocc_list.append(occ)
-        #print("| t eVals/dVals {:>9.4f} (s)".format(time.perf_counter() - tic))
+
+        # Timing on getting the Hamiltonians.
         ham_timing['eVals/dVals'] = time.perf_counter() - tic
         ham_timing['TOT'] = time.perf_counter() - ticHam
-        #ham_timing_formatted = {key: f"{value:8.3f}" for key, value in ham_timing.items()}
         formatted_string = " | ".join(f"{key} {value:8.3f}" for key, value in ham_timing.items())
         print('Rank', rank, 'part', partIndex, ':', formatted_string)
 
 
-    ##########
-    ##########
+    # Time the MPI collection of eigenvectors.
     tic = time.perf_counter()
     if rank != 0:
         gpu_comm.send(Q_list, dest=0, tag=0)
@@ -660,12 +728,8 @@ def get_singlePoint_charges(sdc,
         Q_LIST = [Q_list]
         for i in range(1, gpu_comm.Get_size()):
             Q_LIST.append(gpu_comm.recv(source=i, tag=0))
-    print("Time Q_LIST send/recv {:>9.4f} (s)".format(time.perf_counter() - tic), rank)
-
-    # tic = time.perf_counter()
-    # torch.save(Q_list, 'Q/Q_list_{}.pt'.format(rank)) #### oldR
-    # print("Time to save Q_list {:>9.4f} (s)".format(time.perf_counter() - tic), rank)
-
+    print("Time Q_LIST send/recv {:>9.4f} (s)"
+          .format(time.perf_counter() - tic), rank)
 
     tic = time.perf_counter()
     full_dVals = None
@@ -676,15 +740,21 @@ def get_singlePoint_charges(sdc,
     if rank == 0:
         eValOnRank_SIZES = np.empty(gpu_comm.Get_size(), dtype=int)
 
+    # Gather eigenvalues.
     gpu_comm.Gather(eValOnRank_size, eValOnRank_SIZES, root=0)
+
+    # Set up full containers on Rank 0.
     if rank == 0:
+
         if sdc.UHF:
             full_dVals = np.empty((2, np.sum(eValOnRank_SIZES)), dtype=eValOnRank.dtype)
             recvcounts = [2 * size for size in eValOnRank_SIZES]
+
         else:
             full_dVals = np.empty(np.sum(eValOnRank_SIZES), dtype=eValOnRank.dtype)
             full_eVals = np.empty(np.sum(eValOnRank_SIZES), dtype=eValOnRank.dtype) #### oldR
 
+    # Gather the actual values.
     if sdc.UHF:
         dValOnRank_flat = dValOnRank.flatten()
         gpu_comm.Gatherv(
@@ -695,20 +765,17 @@ def get_singlePoint_charges(sdc,
         gpu_comm.Gatherv(dValOnRank, [full_dVals, eValOnRank_SIZES], root=0)
         gpu_comm.Gatherv(eValOnRank, [full_eVals, eValOnRank_SIZES], root=0) #### oldR
 
+    # These are the FULL versions of the corresponding lists above. See the 
+    # above documentation for which  all of these lists are explained.
     eVal_LIST = gpu_comm.gather(eValOnRank_list, root=0)
     dVal_LIST = gpu_comm.gather(dValOnRank_list, root=0)
     NH_Nh_Hs_LIST = gpu_comm.gather(NH_Nh_Hs_list, root=0)
     core_indices_in_sub_expanded_LIST = gpu_comm.gather(core_indices_in_sub_expanded_list, root=0)
     Nocc_LIST = gpu_comm.gather(Nocc_list, root=0)
 
-    #######
-    #######
+    # Flatten the nested list of lists into a single list of tensors.
+    # There is one tensor per CH partition.
     if rank == 0:
-        # Q_LIST = []
-        # for i in range(gpu_comm.Get_size()):
-        #     Q_LIST.append(torch.load('Q/Q_list_{}.pt'.format(i)))
-
-        # Flatten the nested list of lists into a single list of tensors. One tensor per CH.
         eVal_LIST = list(itertools.chain(*eVal_LIST))
         dVal_LIST = list(itertools.chain(*dVal_LIST))
         Q_LIST = list(itertools.chain(*Q_LIST))
@@ -717,15 +784,14 @@ def get_singlePoint_charges(sdc,
         Nocc_LIST = list(itertools.chain(*Nocc_LIST))
     else:
         Q_LIST = None
-    if node_rank == 0: print("| t commLists {:>9.4f} (s)".format(time.perf_counter() - tic), rank)
+
+    if node_rank == 0:
+        print("| t commLists {:>9.4f} (s)"
+              .format(time.perf_counter() - tic), rank)
 
     if rank == 0:
         tic = time.perf_counter()
-        #######
-        #######
-        mu0 = get_mu(mu0, full_dVals, full_eVals, sdc.Tel, sy.numel/2) #### oldR
-
-        #mu0 = get_mu(mu0, np.concatenate(dVal_LIST, axis = -1), torch.cat(eVal_LIST, dim = -1), sdc.Tel, sy.nocc) # chemical potential calculation
+        mu0 = get_mu(mu0, full_dVals, full_eVals, sdc.Tel, sy.numel/2)
         print("Time mu0 {:>9.4f} (s)".format(time.perf_counter() - tic))
 
     return (EELEC,
@@ -948,7 +1014,7 @@ def get_singlePointDM(sdc,
 
     sdc.EelecOld = sdc.EelecNew
 
-    # Parititona per rank and determine the partitions the current rank is
+    # Parititon per rank and determine the partitions the current rank is
     # responsible for.
     partsPerRank = int(sdc.nparts / node_numranks)
     partIndex1 = rank * partsPerRank
@@ -965,7 +1031,6 @@ def get_singlePointDM(sdc,
         # This will calculate the DM in subsys and update the whole DM
         # rho_ren is a dm contructed with electronic temperature. It's shaped
         # into 4x4 blocks, even for hydrogen atoms, as required by PYSEQM.
-
         rho_ren, maxDif, sumDif = get_density_matrix_renorm(sdc,
                                                             eng,
                                                             sdc.Tel,
@@ -1143,49 +1208,79 @@ def get_adaptiveDM_PYSEQM(sdc,
                           hindex,
                           graphNL):
     """
-    The main driver function. It initializes supplementary comms, dm, graphs, performs scf cycle with graph and dm updates, and then computes forces.
+    The main driver function. It initializes supplementary comms, dm, graphs,
+    performs scf cycle with graph and dm updates, and then computes forces.
+
     sdc:
+        SEDACS driver.
     eng:
-    comm: master communicator
-    rank: global rank
-    numranks: number of global ranks
+        SEDACS Engine.
+    comm:
+        Master MPI communicator.
+    rank:
+        The global rank.
+    numranks:
+        The number of global ranks.
     sy:
-    hindex: orbital index for each atom in the system
-    graphNL: initial connectivity graph
+        The full SEDACS System.
+    hindex:
+        Orbital indices for each atom in the system.
+    graphNL:
+        Initial connectivity graph
     """
-    t_INIT = time.perf_counter() # time for scf initialization
+
+    # SCF Initialization time.
+    t_INIT = time.perf_counter()
     tic = time.perf_counter()
     sdc.EelecOld = 0.0
 
+    # Whether or not to use large tensors. Prohibitively large for big systems.
+    eng.use_pyseqm_lt = False
 
-    eng.use_pyseqm_lt = False # flag for using pyseqm "large tensors", e.g. ij vectors, masks, etc. These are extremely large for large systems.
-                              # If False, pyseqm won't compute them and sedacs will coumpute only necessary subparts.
+    # If False, pyseqm won't compute them and sedacs will coumpute only
+    # the necessary subparts.
     sdc.use_pyseqm_lt = eng.use_pyseqm_lt
 
-    eng.reconstruct_dm = False # Flag for reconstructing the whole dm. For debug purposes only.
+    # Reconstructs the full density matrix for debugging purpose.
+    eng.reconstruct_dm = False
     sdc.reconstruct_dm = eng.reconstruct_dm
 
-    ### SETUP COMMS ###
-    node_comm = comm.Split_type(MPI.COMM_TYPE_SHARED) # local communicator for ranks on a node
-    node_rank = node_comm.Get_rank()  # Local rank on a node
-    node_numranks = node_comm.Get_size() # Communicator size on a node
+    # //Set up the MPI communicators.
+
+    # Local communicator for ranks on a given node.
+    node_comm = comm.Split_type(MPI.COMM_TYPE_SHARED)
+
+    # The local rank on a given node.
+    node_rank = node_comm.Get_rank()
 
+    # The size of a communicator on a given node.
+    node_numranks = node_comm.Get_size()
+
+    # Get relevant information specific to the nodes the job has been
+    # allocated.
     node_name = socket.gethostname()
     node_names = comm.allgather(node_name)
 
     unique_nodes = list(set(node_names))
-    num_nodes = len(unique_nodes)         # Total number of unique nodes
+    num_nodes = len(unique_nodes)
     node_id = int(rank/node_numranks)
 
-    # Get primary ranks on each node. E.g. when running 16 ranks on two nodes, these are ranks 0 and 8. 
+    # Get primary ranks on each node.
+    # E.g. when running 16 ranks on two nodes, these are ranks 0 and 8.
+    # [0, 1, 2, 3, 4, 5, 6, 7], [8, 9, 10, 11, 12, 13, 14, 15]
     primary_rank = None
     if node_rank == 0:
         primary_rank = rank  # Global rank of the primary rank on each node
+
     # Gather the primary ranks from each node
     primary_ranks = comm.allgather(primary_rank)
-    primary_ranks = [r for r in primary_ranks if r is not None]  # Filter out None values
+
+    # Filter out Nones.
+    primary_ranks = [r for r in primary_ranks if r is not None]
     color = 0 if rank in primary_ranks else MPI.UNDEFINED
-    primary_comm = comm.Split(color=color, key=rank) # Communicator for primary ranks
+
+    # Communicator for primary ranks.
+    primary_comm = comm.Split(color=color, key=rank)
 
     device = 'cpu'
     if sdc.scfDevice == 'cuda' and sdc.numGPU == -1:
@@ -1199,20 +1294,33 @@ def get_adaptiveDM_PYSEQM(sdc,
         num_gpus = node_numranks
 
     color = 0 if node_rank < num_gpus else MPI.UNDEFINED
-    gpu_comm = comm.Split(color=color, key=rank) # global communicator for ranks with GPU. Identical to comm if running on CPU.
-    partsPerGPU = int(sdc.nparts / (num_gpus*num_nodes)) # assume all nodes have same number of GPUs!
-    partsPerNode = int(sdc.nparts / num_nodes) # How many CH are processed by each node.
+
+    # Global communicator for ranks with GPU.
+    # Identical to comm if running on CPU.
+    gpu_comm = comm.Split(color=color, key=rank)
+
+    # Assume all nodes have same number of GPUs!
+    partsPerGPU = int(sdc.nparts / (num_gpus*num_nodes))
+
+    # How many CH are processed by each node.
+    partsPerNode = int(sdc.nparts / num_nodes)
     
     gpu_global_rank = None
+
+    # Global rank of the primary rank on each node.
     if node_rank < num_gpus:
-        gpu_global_rank = rank  # Global rank of the primary rank on each node
+        gpu_global_rank = rank  
 
-    # Gather the primary ranks from each node
+    # Gather the primary ranks from each node.
     gpu_global_ranks = comm.allgather(gpu_global_rank)
-    gpu_global_ranks = [r for r in gpu_global_ranks if r is not None]  # Filter out None values
-    ### END SETUP COMMS ###
 
-    # Some data type info for numpy and torch. Double precision is necessary for pyseqm.
+    # Filter out Nones.
+    gpu_global_ranks = [r for r in gpu_global_ranks if r is not None]
+
+    # //Communicator setup finished.
+
+    # Some data type info for numpy and torch.
+    # Double precision is necessary for pyseqm.
     if torch.get_default_dtype() == torch.float32:
         eng.torch_dt = torch.float32
         sdc.torch_dt = eng.torch_dt
@@ -1240,59 +1348,138 @@ def get_adaptiveDM_PYSEQM(sdc,
 
     fullGraph = graphNL.copy()
 
-    # get pyseqm molecule object
+    # Get PYSEQM Molecule object.
     tic = time.perf_counter()
     with torch.no_grad():
-        molecule_whole = get_molecule_pyseqm(sdc, sy.coords, sy.symbols, sy.types, do_large_tensors = sdc.use_pyseqm_lt, device=device)[0]                
-    #print_attribute_sizes(molSysData.molecule_whole)
-    if rank == 0: print("Time to init molSysData {:>7.2f} (s)".format(time.perf_counter() - tic), rank)
+        molecule_whole = get_molecule_pyseqm(sdc,
+                                             sy.coords,
+                                             sy.symbols,
+                                             sy.types,
+                                             do_large_tensors=sdc.use_pyseqm_lt,
+                                             device=device)[0]                
+
+    if rank == 0:
+        print("Time to init molSysData {:>7.2f} (s)"
+              .format(time.perf_counter() - tic), rank)
+
+    # //Initialization for SCF
 
-    ### Initialization for SCF ###
     # Things that are calculated on each primary rank of each node.
     if node_rank == 0:
         tic = time.perf_counter()
-        if rank == 0: print('Computing cores.')
-        parts = graph_partition(sdc, eng, fullGraph, sdc.partitionType, sdc.nparts, sy.coords, sdc.verb) # cores
-        if rank == 0: print("Time to compute cores {:>7.2f} (s)".format(time.perf_counter() - tic), rank)
+        if rank == 0:
+            print('Computing cores.')
+
+        # Core partitions.
+        parts = graph_partition(sdc,
+                                eng,
+                                fullGraph,
+                                sdc.partitionType,
+                                sdc.nparts,
+                                sy.coords,
+                                sdc.verb)
+        if rank == 0:
+            print("Time to compute cores {:>7.2f} (s)"
+                  .format(time.perf_counter() - tic), rank)
 
         tic = time.perf_counter()
-        if rank == 0: print('Loading the molecule and parameters.')
-        
+        if rank == 0:
+            print('Loading the molecule and parameters.')
+
         partsCoreHalo = []
         if rank == 0:
             print('\n|||| Adaptive iter:', 0, '||||')
             print("Core and halos indices for every part:")
         for i in range(sdc.nparts):
-            coreHalo, nc = get_coreHaloIndicesPYSEQM(eng, parts[i], fullGraph, sdc.numJumps, sdc, sy) # halos
+            # Halos.
+            coreHalo, nc = get_coreHaloIndicesPYSEQM(eng,
+                                                     parts[i],
+                                                     fullGraph,
+                                                     sdc.numJumps,
+                                                     sdc,
+                                                     sy)
             partsCoreHalo.append(coreHalo)
-            if sdc.verb: print("coreHalo for part", i, "=", coreHalo)
-            if rank == 0: print('  N atoms in core/coreHalo {:>6d} : {:>6d} {:>6d}'.format(i, len(parts[i]), len(coreHalo)))
-        print("Time to compute halos {:>7.2f} (s)".format(time.perf_counter() - tic), rank)
+            if sdc.verb:
+                print("coreHalo for part", i, "=", coreHalo)
+            if rank == 0:
+                print('  N atoms in core/coreHalo {:>6d} : {:>6d} {:>6d}'
+                      .format(i, len(parts[i]), len(coreHalo)))
+
+        print("Time to compute halos {:>7.2f} (s)"
+              .format(time.perf_counter() - tic), rank)
 
         tic = time.perf_counter()
-        new_graph_for_pairs = get_ch_graph(sdc, sy, fullGraph, parts, partsCoreHalo) # Graph where new_graph_for_pairs[i] is a CH in which atom i is a core atom. new_graph_for_pairs[i][0] is the size of CH.
-        graph_for_pairs = new_graph_for_pairs # Here, same as new_graph_for_pairs
-        graph_maskd = get_maskd(sdc, sy, graph_for_pairs) # mask for diagonal block in contracted dm
-        print("Time to init mod graphs {:>7.2f} (s)".format(time.perf_counter() - tic), rank)
+
+        # Graph where new_graph_for_pairs[i] is a CH in which atom i is a core
+        # atom. new_graph_for_pairs[i][0] is the size of CH.
+        new_graph_for_pairs = get_ch_graph(sdc,
+                                           sy,
+                                           fullGraph,
+                                           parts,
+                                           partsCoreHalo)
+
+        # Here, same as new_graph_for_pairs.
+        graph_for_pairs = new_graph_for_pairs
+
+        # Mask for diagonal block in contracted density matrix.
+        graph_maskd = get_maskd(sdc, sy, graph_for_pairs)
+
+        print("Time to init mod graphs {:>7.2f} (s)"
+              .format(time.perf_counter() - tic), rank)
 
         tic = time.perf_counter()
         if sdc.UHF:
-            P_contr = torch.zeros(2, sy.nats*sdc.maxDeg,4,4, dtype=eng.torch_dt, device=device)  # contracted density matrix
-            P_contr[:,graph_maskd] = 0.5*get_diag_guess_pyseqm(molecule_whole, sy) # diagonal initial guess
-            #P_contr = P_contr * torch.tensor([sy.nocc_alpha/(sy.numel/2), sy.nocc_beta/(sy.numel/2)], device = device).view(2, 1, 1, 1)
-            P_contr = P_contr.reshape(2, sy.nats, sdc.maxDeg, 4,4).transpose(1,2) # (2, n_atoms, max_deg, 4, 4). A rectangle of 4x4 square blocks.
+
+            # Contracted density matrix.
+            P_contr = torch.zeros(2,
+                                  sy.nats*sdc.maxDeg,
+                                  4,
+                                  4,
+                                  dtype=eng.torch_dt,
+                                  device=device)
+
+            # Diagonal initial guess.
+            P_contr[:, graph_maskd] = 0.5*get_diag_guess_pyseqm(molecule_whole,
+                                                                sy)
+
+            # Shape is: (2, n_atoms, max_deg, 4, 4).
+            # A rectangle of 4x4 square blocks.
+            P_contr = P_contr.reshape(2,
+                                      sy.nats,
+                                      sdc.maxDeg,
+                                      4,
+                                      4).transpose(1, 2)
             P_contr_size = P_contr.size()
             P_contr_nbytes = P_contr.numel() * P_contr.element_size()
+
         else:
-            P_contr = torch.zeros(sy.nats*sdc.maxDeg,4,4, dtype=eng.torch_dt, device=device)  # contracted density matrix
-            P_contr[graph_maskd] = get_diag_guess_pyseqm(molecule_whole, sy) # diagonal initial guess
-            P_contr = P_contr.reshape(sy.nats, sdc.maxDeg, 4,4).transpose(0,1) # (n_atoms, max_deg, 4, 4). A rectangle of 4x4 square blocks.
+            # Contracted density matrix.
+            P_contr = torch.zeros(sy.nats*sdc.maxDeg,
+                                  4,
+                                  4,
+                                  dtype=eng.torch_dt,
+                                  device=device)
+
+            # Diagonal initial guess.
+            P_contr[graph_maskd] = get_diag_guess_pyseqm(molecule_whole, sy)
+
+            # Shape is: (n_atoms, max_deg, 4, 4).
+            # A rectangle of 4x4 square blocks.
+            P_contr = P_contr.reshape(sy.nats,
+                                      sdc.maxDeg,
+                                      4,
+                                      4).transpose(0, 1)
+
             P_contr_size = P_contr.size()
             P_contr_nbytes = P_contr.numel() * P_contr.element_size()
-        print("Time to init DM {:>7.2f} (s)".format(time.perf_counter() - tic), rank)
-        # graphNL = collect_graph_from_rho(None, sdc.overlap_whole, sdc.gthreshinit, sy.nats, sdc.maxDeg, [i for i in range(0,sy.nats)],hindex)
+
+        print("Time to init DM {:>7.2f} (s)"
+              .format(time.perf_counter() - tic), rank)
+
         del graphNL
+
     else:
+
         parts = None
         sdc.nparts = None
 
@@ -1307,24 +1494,44 @@ def get_adaptiveDM_PYSEQM(sdc,
         P_contr = None
         P_contr_size = None
         P_contr_nbytes = 0
-    
+
     tic = time.perf_counter()
     parts = node_comm.bcast(parts, root=0)
     sdc.nparts = node_comm.bcast(sdc.nparts, root=0)
-    if rank == 0: print("BCST1 {:>7.2f} (s)".format(time.perf_counter() - tic), rank)
+
+    if rank == 0:
+        print("BCST1 {:>7.2f} (s)"
+              .format(time.perf_counter() - tic), rank)
 
     tic = time.perf_counter()
-    # P_contr is in shared memory between ranks on one node but each node has its own copy.
+    # P_contr is in shared memory between ranks on one node
+    # but each node has its own copy.
     P_contr_size = node_comm.bcast(P_contr_size, root=0)
-    P_contr_win = MPI.Win.Allocate_shared(P_contr_nbytes, torch.tensor(0, dtype=eng.torch_dt).element_size(), comm=node_comm) # 8 is the size of torch.float64
-    P_contr_buf, P_contr_itemsize = P_contr_win.Shared_query(0) 
-    P_contr_ary = np.ndarray(buffer=P_contr_buf, dtype=eng.np_dt, shape=(P_contr_size))
+
+    # 8 is the size of torch.float64
+    P_contr_win = MPI.Win.Allocate_shared(P_contr_nbytes,
+                                          torch.tensor(0,
+                                                       dtype=eng.torch_dt)
+                                          .element_size(), comm=node_comm)
+
+    P_contr_buf, P_contr_itemsize = P_contr_win.Shared_query(0)
+
+    P_contr_ary = np.ndarray(buffer=P_contr_buf,
+                             dtype=eng.np_dt,
+                             shape=(P_contr_size))
+
     if node_rank == 0:
-        P_contr_ary[:] = P_contr.cpu().numpy()   
+        P_contr_ary[:] = P_contr.cpu().numpy()
+
     comm.Barrier()
+
     del P_contr
+
     P_contr = torch.from_numpy(P_contr_ary).to(device)
-    if rank == 0: print("BCST2 {:>7.2f} (s)".format(time.perf_counter() - tic), rank)
+
+    if rank == 0:
+        print("BCST2 {:>7.2f} (s)"
+              .format(time.perf_counter() - tic), rank)
 
     tic = time.perf_counter()
     fullGraph = node_comm.bcast(fullGraph, root=0)
@@ -1332,54 +1539,118 @@ def get_adaptiveDM_PYSEQM(sdc,
     partsCoreHalo = node_comm.bcast(partsCoreHalo, root=0)
     graph_maskd = node_comm.bcast(graph_maskd, root=0)
     graph_for_pairs = node_comm.bcast(graph_for_pairs, root=0)
-    if rank == 0: print("BCST3 {:>7.2f} (s)".format(time.perf_counter() - tic), rank)
-    print("Time to init bcast and share DM {:>7.2f} (s)".format(time.perf_counter() - tic), rank)
+    if rank == 0:
+        print("BCST3 {:>7.2f} (s)"
+              .format(time.perf_counter() - tic), rank)
+
+    print("Time to init bcast and share DM {:>7.2f} (s)"
+          .format(time.perf_counter() - tic), rank)
 
     if rank == 0:
         print("Time INIT {:>7.2f} (s)".format(time.perf_counter() - t_INIT))
-    ### END Initialization for SCF ###
 
-    ### SCF cycle ###
-    mu0 = -5.5 # initial guess for chemical potential
+    # //Initilziation for SCF finished.
+
+    # //Begin SCF Cycle.
+
+    # Initial chemical potential guess. TODO: This should probably not be 
+    # hard-coded.
+    mu0 = -5.5
     if sdc.UHF:
         mu0 = np.array([mu0+0.1, mu0-0.1])
         mu0 = np.array([-1.3, -5.5])
 
+    # Iteration loop.
     for gsc in range(sdc.numAdaptIter):
+
         if rank == 0: print('\n\n|||| Adaptive iter:', gsc, '||||')
-        #print_memory_usage(rank, node_rank, "Memory usage")
         TIC_iter = time.perf_counter()
         tic = time.perf_counter()
-        if node_rank == 0: # broadcasts dm from root rank (assuming its rank 0 on node 0) to primary ranks on other nodes. E.g. for ranks arranged as {node0:[0,1,2,3] node1:[4,5,6,7]}, dm is broadcates from 0 to 4. One of the major bottlenecks.
+
+        # Broadcasts dm from root rank (assuming its rank 0 on node 0) to
+        # primary ranks on other nodes. E.g. for ranks arranged as 
+        # {node0:[0,1,2,3] node1:[4,5,6,7]}, dm is broadcates from 0 to 4.
+        # One of the major bottlenecks.
+        if node_rank == 0:
             primary_comm.Bcast([P_contr.cpu().numpy(), MPI.DOUBLE], root=0)
-        if rank == 0: print("Time to  bcast DM_cpu_np {:>7.2f} (s)".format(time.perf_counter() - tic), rank)
+
+        if rank == 0:
+            print("Time to  bcast DM_cpu_np {:>7.2f} (s)"
+                  .format(time.perf_counter() - tic), rank)
+
         tic = time.perf_counter()
-        if gsc > 0: # lots of things have been done during initialization, so at iteration 0 we can proceed right to get_singlePoint
-            if node_rank == 0: # halos, dm contraction, and graphs are performed on primary ranks of each node and then broadcasted locally to other ranks
-                ### HALOS CALC ###
+
+        # Lots of things have been done during initialization,
+        # so after iteration 0 we can proceed right to get_singlePoint.
+        if gsc > 0:
+
+            # Halos, dm contraction, and graphs are performed on primary ranks
+            # of each node and then broadcasted locally to other ranks.
+            if node_rank == 0:
+
+                # //Begin HALOS
                 tic = time.perf_counter()
                 partsCoreHalo = []
-                if rank == 0:print("Core and halos indices for every part:")
+
+                if rank == 0:
+                    print("Core and halos indices for every part:")
+
                 for i in range(sdc.nparts):
+
                     coreHalo, nc, nch = get_coreHaloIndices(parts[i], fullGraph, sdc.numJumps, eng=eng)
                     partsCoreHalo.append(coreHalo)
-                    if sdc.verb and rank == 0: print("coreHalo for part", i, "=", coreHalo)
-                    if rank == 0: print('  N atoms in core/coreHalo {:>6d} : {:>6d} {:>6d}'.format(i, len(parts[i]), len(coreHalo)))
-                if rank == 0: print("Time to compute halos {:>7.2f} (s)".format(time.perf_counter() - tic))
-                ### END HALOS CALC ###
+
+                    if sdc.verb and rank == 0:
+                        print("coreHalo for part", i, "=", coreHalo)
+
+                    if rank == 0:
+                        print('  N atoms in core/coreHalo {:>6d} : {:>6d} {:>6d}'
+                              .format(i, len(parts[i]), len(coreHalo)))
+
+                if rank == 0:
+                    print("Time to compute halos {:>7.2f} (s)"
+                          .format(time.perf_counter() - tic))
+                # //End HALOS
 
                 tic = time.perf_counter()
-                new_graph_for_pairs = get_ch_graph(sdc, sy, fullGraph, parts, partsCoreHalo) # Graph where new_graph_for_pairs[i] is a CH in which atom i is a core atom. new_graph_for_pairs[i][0] is the size of CH.
-                if rank == 0: print("Time to updt DM and mod graphs {:>7.2f} (s)".format(time.perf_counter() - tic))
+
+                # Graph where new_graph_for_pairs[i] is a CH in which atom i
+                # is a core atom. new_graph_for_pairs[i][0] is the size of CH.
+                new_graph_for_pairs = get_ch_graph(sdc,
+                                                   sy,
+                                                   fullGraph,
+                                                   parts,
+                                                   partsCoreHalo)
+
+                if rank == 0:
+                    print("Time to updt DM and mod graphs {:>7.2f} (s)"
+                          .format(time.perf_counter() - tic))
+
                 tic = time.perf_counter()
-                # torch.save(P_contr, 'P.pt')
-                # exit()
-                update_dm_contraction(sdc, sy, P_contr, graph_for_pairs, new_graph_for_pairs, device) # update dm contraction based on new_graph_for_pairs
-                graph_for_pairs = new_graph_for_pairs # reset graph_for_pairs
-                if rank == 0: print("Time to updt DM and mod graphs {:>7.2f} (s)".format(time.perf_counter() - tic))
+
+                # Update dm contraction based on new_graph_for_pairs.
+                update_dm_contraction(sdc,
+                                      sy,
+                                      P_contr,
+                                      graph_for_pairs,
+                                      new_graph_for_pairs,
+                                      device)
+
+                # Reset graph_for_pairs.
+                graph_for_pairs = new_graph_for_pairs
+                if rank == 0:
+                    print("Time to updt DM and mod graphs {:>7.2f} (s)"
+                          .format(time.perf_counter() - tic))
+
                 tic = time.perf_counter()
-                graph_maskd = get_maskd(sdc, sy, graph_for_pairs) # get diagonal mask of contracted dm
-                if rank == 0: print("Time to updt DM and mod graphs {:>7.2f} (s)".format(time.perf_counter() - tic))
+
+                # Diagonal mask of contracted density matrix.
+                graph_maskd = get_maskd(sdc, sy, graph_for_pairs)
+
+                if rank == 0:
+                    print("Time to updt DM and mod graphs {:>7.2f} (s)"
+                          .format(time.perf_counter() - tic))
+
             else:
                 coreHalo = None
                 partsCoreHalo = None
@@ -1391,48 +1662,97 @@ def get_adaptiveDM_PYSEQM(sdc,
             partsCoreHalo = node_comm.bcast(partsCoreHalo, root=0)
             graph_for_pairs = node_comm.bcast(graph_for_pairs, root=0)
             graph_maskd = node_comm.bcast(graph_maskd, root=0)
-            if node_rank == 0: print("Time to bcast DM and mod graphs {:>7.2f} (s)".format(time.perf_counter() - tic), rank)
-            
+            if node_rank == 0:
+                print("Time to bcast DM and mod graphs {:>7.2f} (s)"
+                      .format(time.perf_counter() - tic), rank)
+
         tic = time.perf_counter()
-        # Single point part. For efficiency, the PySEQM dm needs to be reshaped in 4x4 blocks.
-        global_Eelec = np.zeros(1, dtype=np.float64) # this will sum electronic energy from CHs on ranks, giving total Eelec
+
+        # Single point part. For efficiency, the PySEQM density matrix
+        # needs to be reshaped in 4x4 blocks.
+
+        # This will sum electronic energy from CHs on ranks, giving total Eelec
+        global_Eelec = np.zeros(1, dtype=np.float64)
+
+        # TODO: All of this is so PYSEQM specific already, this should probably
+        # be removed.
         if eng.interface == "PySEQM":
             with torch.no_grad():
-                if node_rank < num_gpus: # This condition is for GPU jobs only because sometimes there are fewer GPUs per node than ranks per nodes.
-                                         # We want more ranks per node because dm update always happens on CPU, on node 0, in parallel.
-                    eElec, eValOnRank_list, Q_list, NH_Nh_Hs_list, core_indices_in_sub_expanded_list, Nocc_list, mu0 = \
-                    get_singlePoint(sdc, eng, partsPerGPU, partsPerNode, node_id, node_rank, rank, gpu_comm, parts, partsCoreHalo, sy, hindex, mu0, molecule_whole,
-                                    P_contr, graph_for_pairs, graph_maskd)
+                # This condition is for GPU jobs only because sometimes there
+                # are fewer GPUs per node than ranks per nodes.
+
+                if node_rank < num_gpus:
+                    # We want more ranks per node because dm update always
+                    # happens on CPU, on node 0, in parallel.
+                    (eElec,
+                     eValOnRank_list,
+                     Q_list,
+                     NH_Nh_Hs_list,
+                     core_indices_in_sub_expanded_list,
+                     Nocc_list, mu0) = get_singlePoint(sdc,
+                                                       eng,
+                                                       partsPerGPU,
+                                                       partsPerNode,
+                                                       node_id,
+                                                       node_rank,
+                                                       rank,
+                                                       gpu_comm,
+                                                       parts,
+                                                       partsCoreHalo,
+                                                       sy,
+                                                       hindex,
+                                                       mu0,
+                                                       molecule_whole,
+                                                       P_contr,
+                                                       graph_for_pairs,
+                                                       graph_maskd)
+
                     gpu_comm.Allreduce(eElec, global_Eelec, op=MPI.SUM)
 
                 else:
-                    eElec, eValOnRank_list, Q_list, NH_Nh_Hs_list, core_indices_in_sub_expanded_list, Nocc_list, mu0 = 0, None, None, None, None, None, None
+                    (eElec,
+                     eValOnRank_list,
+                     Q_list,
+                     NH_Nh_Hs_list,
+                     core_indices_in_sub_expanded_list,
+                     Nocc_list,
+                     mu0) = 0, None, None, None, None, None, None
+
             comm.Barrier()
+
         else:
             raise ValueError(f"ERROR!!!: Interface type not recognized: '{eng.interface}'. " +
-                     f"Use any of the following: Module,File,Socket,MDI")
-        if gsc==0 and sdc.UHF:
+                             f"Use any of the following: Module,File,Socket,MDI")
+
+        if gsc == 0 and sdc.UHF:
             print('sym break')
+
             for I in range(len(Q_list)):
                 orb_idx = NH_Nh_Hs_list[I][3][0]
                 Q_list[I][0,:,orb_idx] = 0.9*Q_list[I][0,:,orb_idx-1] + 0.1*Q_list[I][0,:,orb_idx]
+
         sdc.EelecNew = global_Eelec[0]
-        #torch.cuda.synchronize()
-        if rank == 0: print("Time to get_singlePoint {:>7.2f} (s)".format(time.perf_counter() - tic))
 
-        if sdc.restartLoad: # If True, these files will be read and used instead as default initial guess.
+        if rank == 0:
+            print("Time to get_singlePoint {:>7.2f} (s)"
+                  .format(time.perf_counter() - tic))
+
+        # If True, these files will be read and used instead as default initial guess.
+        if sdc.restartLoad: 
             sdc.restartLoad = False
             if node_rank == 0:
                 P_contr[:] = torch.load('P_contr.pt')
-            with open('parts.pkl','rb') as f:
+            with open('parts.pkl', 'rb') as f:
                 parts = pickle.load(f)
-            with open('partsCoreHalo.pkl','rb') as f:
+            with open('partsCoreHalo.pkl', 'rb') as f:
                 partsCoreHalo = pickle.load(f)
-            with open('fullGraph.pkl','rb') as f:
+            with open('fullGraph.pkl', 'rb') as f:
                 fullGraph = pickle.load(f)
+
             mu0 = np.load('mu0.npy')
             graph_for_pairs = np.load('graph_for_pairs.npy')
             graph_maskd = np.load('graph_maskd.npy')
+
             if rank == 0:
                 eValOnRank_list = torch.load('eValOnRank_list.pt')
                 Q_list = torch.load('Q_list.pt')
@@ -1440,11 +1760,13 @@ def get_adaptiveDM_PYSEQM(sdc,
                 core_indices_in_sub_expanded_list = torch.load('core_indices_in_sub_expanded_list.pt')
                 Nocc_list = torch.load('Nocc_list.pt')
 
-        if rank == 0 and sdc.restartSave: # Save for future restart. Slows things down significantly.
+        # Save for future restart. Slows things down significantly.
+        if rank == 0 and sdc.restartSave:
             torch.save(eValOnRank_list, 'eValOnRank_list.pt')
             torch.save(Q_list, 'Q_list.pt')
             torch.save(NH_Nh_Hs_list, 'NH_Nh_Hs_list.pt')
-            torch.save(core_indices_in_sub_expanded_list, 'core_indices_in_sub_expanded_list.pt')
+            torch.save(core_indices_in_sub_expanded_list,
+                       'core_indices_in_sub_expanded_list.pt')
             torch.save(Nocc_list, 'Nocc_list.pt')
             torch.save(P_contr, 'P_contr.pt')
             with open('parts.pkl', 'wb') as f:
@@ -1457,101 +1779,182 @@ def get_adaptiveDM_PYSEQM(sdc,
             np.save('graph_for_pairs', graph_for_pairs)
             np.save('graph_maskd', graph_maskd)
 
-        if rank == 0: # this defines what part of density matrix will be updated by each rank on node 0.
+        # This defines what part of density matrix will be
+        # updated by each rank on node 0.
+        if rank == 0:
             tic = time.perf_counter()
             partsPerRank = int(sdc.nparts / node_numranks)
             partIndex1 = 0 * partsPerRank
             partIndex2 = (0 + 1) * partsPerRank
-            Q_list_on_rank = Q_list[partIndex1:partIndex2]  # Root rank processes its own part
+            # Root rank processes its own part
+            Q_list_on_rank = Q_list[partIndex1:partIndex2]
+
             for r in range(1, node_numranks):
                 partIndex1 = r * partsPerRank
                 partIndex2 = (r + 1) * partsPerRank
+
                 # Send only the necessary slice to each rank
                 node_comm.send(Q_list[partIndex1:partIndex2], dest=r, tag=0)
-            print("Time send Q_list slice {:>7.2f} (s)".format(time.perf_counter() - tic))
+
+            print("Time send Q_list slice {:>7.2f} (s)"
+                  .format(time.perf_counter() - tic))
 
         if rank < node_numranks and rank != 0:
             Q_list_on_rank = node_comm.recv(source=0, tag=0)
 
         if rank < node_numranks:
             tic = time.perf_counter()
-            ### BCAST DATA across ranks on node 0 ###
+
+            # Broadcast data across ranks on node 0.
             eValOnRank_list = node_comm.bcast(eValOnRank_list, root=0)
             NH_Nh_Hs_list = node_comm.bcast(NH_Nh_Hs_list, root=0)
             core_indices_in_sub_expanded_list = node_comm.bcast(core_indices_in_sub_expanded_list, root=0)
             Nocc_list = node_comm.bcast(Nocc_list, root=0)
             mu0 = node_comm.bcast(mu0, root=0)
             node_comm.Barrier()
-            ### END BCAST DATA ###
 
-            with torch.no_grad(): # dm update and the graph from dm
-                fullGraphRho = get_singlePointDM(sdc, eng, rank, node_numranks, node_comm, parts, partsCoreHalo, sy, hindex, mu0, P_contr, graph_for_pairs,
-                                         eValOnRank_list, Q_list_on_rank, NH_Nh_Hs_list, core_indices_in_sub_expanded_list)
-            if rank == 0: print("Time to updt DM {:>7.2f} (s)".format(time.perf_counter() - tic))
+            # Density matrix update and the graph from the DM.
+            with torch.no_grad():
+                fullGraphRho = get_singlePointDM(sdc,
+                                                 eng,
+                                                 rank,
+                                                 node_numranks,
+                                                 node_comm,
+                                                 parts,
+                                                 partsCoreHalo,
+                                                 sy,
+                                                 hindex,
+                                                 mu0,
+                                                 P_contr,
+                                                 graph_for_pairs,
+                                                 eValOnRank_list,
+                                                 Q_list_on_rank,
+                                                 NH_Nh_Hs_list,
+                                                 core_indices_in_sub_expanded_list)
+
+            if rank == 0:
+                print("Time to updt DM {:>7.2f} (s)"
+                      .format(time.perf_counter() - tic))
+
             node_comm.Barrier()
 
             tic = time.perf_counter()
-            fullGraphRho_LIST = node_comm.gather(fullGraphRho, root=0) # get dm-derived graphs from each rank
+
+            # Get graph derived from the density matrix (on each rank.)
+            fullGraphRho_LIST = node_comm.gather(fullGraphRho,
+                                                 root=0)
             if rank == 0:
-                fullGraphRho_LIST.append(fullGraph) # adds the graph we got on the previous iteration. Note, when doing SCF, the graph keeps growing, no nodes from previous iterations are removed.
-                fullGraph = add_mult_graphs(fullGraphRho_LIST) # adds together the list of graphs
-                print("Time to add graphs {:>7.2f} (s)".format(time.perf_counter() - tic))
+
+                # Adds the graph we got on the previous iteration. NOTE:, when
+                # doing SCF, the graph keeps growing, no nodes from previous
+                # iterations are removed.
+                fullGraphRho_LIST.append(fullGraph)
+
+                # Combines graphs from the Python List.
+                fullGraph = add_mult_graphs(fullGraphRho_LIST)
+
+                print("Time to add graphs {:>7.2f} (s)"
+                      .format(time.perf_counter() - tic))
+
             del fullGraphRho
-            
+
             if rank == 0:
                 tic = time.perf_counter()
                 if sdc.UHF:
-                    trace = torch.sum(P_contr.transpose(1,2).reshape(2, molecule_whole.molsize*(len(graph_for_pairs[0])-1), 4,4)[:, graph_maskd].diagonal(dim1=-2, dim2=-1), dim=(1,2))
-                    print("DM TRACE: {:>10.8f}, {:>10.8f}".format(trace[0], trace[1]))
+                    trace = torch.sum(P_contr.transpose(1, 2)
+                                      .reshape(2,
+                                               molecule_whole.molsize*(len(graph_for_pairs[0])-1),
+                                               4,
+                                               4)[:, graph_maskd].diagonal(dim1=-2, dim2=-1), dim=(1, 2))
+
+                    print("DM TRACE: {:>10.8f}, {:>10.8f}"
+                          .format(trace[0], trace[1]))
+
                 else:
-                    trace = torch.sum(P_contr.transpose(0,1).reshape(molecule_whole.molsize*(len(graph_for_pairs[0])-1), 4,4)[graph_maskd].diagonal(dim1=-2, dim2=-1))
+                    trace = torch.sum(P_contr.transpose(0, 1)
+                                      .reshape(molecule_whole.molsize*(len(graph_for_pairs[0])-1),
+                                               4,
+                                               4)[graph_maskd].diagonal(dim1=-2, dim2=-1))
+
                     print("DM TRACE: {:>10.7f}".format(trace))
-                print("Time to get trace {:>7.2f} (s)".format(time.perf_counter() - tic))
+
+                print("Time to get trace {:>7.2f} (s)"
+                      .format(time.perf_counter() - tic))
+
         else:
             fullGraph = None
-            
+
         tic = time.perf_counter()
-        fullGraph = comm.bcast(fullGraph, root=0) # bcast the new graph across ALL ranks
-        if rank == 0: print("Time to bcast fullGraph {:>7.2f} (s)".format(time.perf_counter() - tic))
+
+        # Broadcast the new graph across ALL ranks.
+        fullGraph = comm.bcast(fullGraph, root=0)
+
+        if rank == 0:
+            print("Time to bcast fullGraph {:>7.2f} (s)"
+                  .format(time.perf_counter() - tic))
+
         del eValOnRank_list, Q_list, NH_Nh_Hs_list, Nocc_list
+
         torch.cuda.empty_cache()
+        if rank == 0:
+            print("t Iter {:>8.2f} (s)"
+                  .format(time.perf_counter() - TIC_iter))
+
+    # //SCF Cycle complete.
+
+    # //Forces begin
+    tic_F_INIT = time.perf_counter()
 
-        # Function to calculate tensor size in megabytes (MB)
-        # if rank == 0:
-            # # Sort tensors by size and print them
-            # tensors = list(get_tensors())
-            # tensors.sort(key=lambda x: tensor_size(x), reverse=True)
-            # print("Top memory-consuming tensors:")
-            # for tensor in tensors:
-                # if tensor_size(tensor) > 0.1:
-                    # print(f"Tensor size: {tensor_size(tensor):.2f} MB | Shape: {tensor.shape} | Dtype: {tensor.dtype}")
-        if rank == 0: print("t Iter {:>8.2f} (s)".format(time.perf_counter() - TIC_iter))
-    ### END SCF cycle ###
-    
-    ### FORCES CALC ###
-    tic_F_INIT = time.perf_counter()    
     if node_rank < num_gpus:
-        if node_rank == 0: # broadcasts dm from root rank (assuming its rank 0 on node 0) to primary ranks on other nodes. E.g. for ranks arranged as {node0:[0,1,2,3] node1:[4,5,6,7]}, dm is broadcates from 0 to 4. One of the major bottlenecks.
+
+        # Broadcast density matrix from root rank (assuming rank=node=0.)
+        # to primary ranks on other nodes.
+        if node_rank == 0:
             primary_comm.Bcast([P_contr.cpu().numpy(), MPI.DOUBLE], root=0)
             forces = np.zeros((sy.coords.shape))
             partsCoreHalo = []
-            if rank == 0: print("\nCore and halos indices for every part:")
+            if rank == 0:
+                print("\nCore and halos indices for every part:")
+
             for i in range(sdc.nparts):
-                coreHalo, nc, nch = get_coreHaloIndices(parts[i], fullGraph, sdc.numJumps, eng=eng)
+                coreHalo, nc, nch = get_coreHaloIndices(parts[i],
+                                                        fullGraph,
+                                                        sdc.numJumps,
+                                                        eng=eng)
+
                 partsCoreHalo.append(coreHalo)
-                if sdc.verb: print("coreHalo for part", i, "=", coreHalo)
-                if rank == 0: print('N atoms in core/coreHalo {:>6d} : {:>6d} {:>6d}'.format(i, len(parts[i]), len(coreHalo)))
+                if sdc.verb:
+                    print("coreHalo for part", i, "=", coreHalo)
+                if rank == 0:
+                    print('N atoms in core/coreHalo {:>6d} : {:>6d} {:>6d}'
+                          .format(i, len(parts[i]), len(coreHalo)))
 
             tic = time.perf_counter()
             new_graph_for_pairs = get_ch_graph(sdc, sy, fullGraph, parts, partsCoreHalo)
-            if rank == 0: print("Time to updt DM and mod graphs {:>7.2f} (s)".format(time.perf_counter() - tic))
+            if rank == 0:
+                print("Time to updt DM and mod graphs {:>7.2f} (s)"
+                      .format(time.perf_counter() - tic))
+
             tic = time.perf_counter()
-            update_dm_contraction(sdc, sy, P_contr, graph_for_pairs, new_graph_for_pairs, device)
+            update_dm_contraction(sdc,
+                                  sy,
+                                  P_contr,
+                                  graph_for_pairs,
+                                  new_graph_for_pairs,
+                                  device)
+
             graph_for_pairs = new_graph_for_pairs
-            if rank == 0: print("Time to updt DM and mod graphs {:>7.2f} (s)".format(time.perf_counter() - tic))
+            if rank == 0:
+                print("Time to updt DM and mod graphs {:>7.2f} (s)"
+                      .format(time.perf_counter() - tic))
+
             tic = time.perf_counter()
             graph_maskd = get_maskd(sdc, sy, graph_for_pairs)
-            if rank == 0: print("Time to updt DM and mod graphs {:>7.2f} (s)".format(time.perf_counter() - tic))
+
+            if rank == 0:
+                print("Time to updt DM and mod graphs {:>7.2f} (s)"
+                      .format(time.perf_counter() - tic))
+
         else:
             forces = None
             partsCoreHalo = None
@@ -1564,7 +1967,13 @@ def get_adaptiveDM_PYSEQM(sdc,
         else:
             device = 'cpu'
 
-        molecule_whole =  get_molecule_pyseqm(sdc, sy.coords, sy.symbols, sy.types, do_large_tensors = sdc.use_pyseqm_lt, device=device)[0] 
+        molecule_whole = get_molecule_pyseqm(sdc,
+                                             sy.coords,
+                                             sy.symbols,
+                                             sy.types,
+                                             do_large_tensors=sdc.use_pyseqm_lt,
+                                             device=device)[0]
+
         forces = gpu_comm.bcast(forces, root=0)
         partsCoreHalo = gpu_comm.bcast(partsCoreHalo, root=0)
         gpu_comm.Barrier()
@@ -1574,28 +1983,78 @@ def get_adaptiveDM_PYSEQM(sdc,
         if rank == 0:
             forces[:] = .0
         gpu_comm.Barrier()
-        if rank == 0: print("Time init forces {:>8.2f} (s)".format(time.perf_counter() - tic_F_INIT))
+
+        if rank == 0:
+            print("Time init forces {:>8.2f} (s)"
+                  .format(time.perf_counter() - tic_F_INIT))
 
         tic = time.perf_counter()
         if eng.interface == "PySEQM":
+
             if sdc.doForces:
-                eElec = get_singlePointForces(sdc, eng, partsPerGPU, partsPerNode, node_id, node_rank, rank, parts, partsCoreHalo, sy, hindex, forces, molecule_whole,
-                            None, P_contr.to(device), graph_for_pairs, graph_maskd)
+                eElec = get_singlePointForces(sdc,
+                                              eng,
+                                              partsPerGPU,
+                                              partsPerNode,
+                                              node_id,
+                                              node_rank,
+                                              rank,
+                                              parts,
+                                              partsCoreHalo,
+                                              sy,
+                                              hindex,
+                                              forces,
+                                              molecule_whole,
+                                              None,
+                                              P_contr.to(device),
+                                              graph_for_pairs,
+                                              graph_maskd)
             else:
                 with torch.no_grad():
-                    eElec = get_singlePointForces(sdc, eng, partsPerGPU, partsPerNode, node_id, node_rank, rank, parts, partsCoreHalo, sy, hindex, forces, molecule_whole,
-                            None, P_contr.to(device), graph_for_pairs, graph_maskd)
+                    eElec = get_singlePointForces(sdc,
+                                                  eng,
+                                                  partsPerGPU,
+                                                  partsPerNode,
+                                                  node_id,
+                                                  node_rank,
+                                                  rank,
+                                                  parts,
+                                                  partsCoreHalo,
+                                                  sy,
+                                                  hindex,
+                                                  forces,
+                                                  molecule_whole,
+                                                  None,
+                                                  P_contr.to(device),
+                                                  graph_for_pairs,
+                                                  graph_maskd)
+
             global_Eelec = np.zeros(1, dtype=np.float64)
             gpu_comm.Barrier()
             gpu_comm.Allreduce(MPI.IN_PLACE, forces, op=MPI.SUM)
-            gpu_comm.Allreduce(eElec, global_Eelec, op=MPI.SUM) #primary_comm
+
+            # Primary communicator.
+            gpu_comm.Allreduce(eElec, global_Eelec, op=MPI.SUM)
+
         if rank == 0:
-            print("Time to get electron forces {:>8.2f} (s)".format(time.perf_counter() - tic))
-            print("eElec:   {:>10.12f}".format(global_Eelec[0]),)
-        
-        # Nuclear energy and forces. For now, done on one cpu/gpu, for the whole system at once (pyseqm style). Hence, do_large_tensors = True. Needs to be fixed.
+            print("Time to get electron forces {:>8.2f} (s)"
+                  .format(time.perf_counter() - tic))
+
+            print("eElec:   {:>10.12f}"
+                  .format(global_Eelec[0]),)
+
+        # Nuclear energy and forces. For now, done on one cpu/gpu,
+        # for the whole system at once (pyseqm style). Hence, do_large_tensors
+        # = True. Needs to be fixed.
         if rank == 0:
-            molSysData = pyseqmObjects(sdc, sy.coords, sy.symbols, sy.types, do_large_tensors = True, device=device) #object with whatever initial parameters and tensors
+            # Object with whatever initial parameters and tensors.
+            molSysData = pyseqmObjects(sdc,
+                                       sy.coords,
+                                       sy.symbols,
+                                       sy.types,
+                                       do_large_tensors=True,
+                                       device=device)
+
             tic = time.perf_counter()
             eNucAB = get_eNuc(eng, molSysData)
             eTot, eNuc = get_eTot(eng, molSysData, eNucAB, 0)
@@ -1606,67 +2065,137 @@ def get_adaptiveDM_PYSEQM(sdc,
             molSysData.molecule_whole.coordinates.grad.zero_()
             print("Time to get nuclear forces {:>8.2f} (s)".format(time.perf_counter() - tic))
             np.save('forces', (forces+forceNuc.cpu().numpy()[0]), )
-    ### END FORCES CALC ###
 
+    # //Forces finished.
+
+
+def get_adaptiveDM(sdc,
+                   eng,
+                   comm,
+                   rank,
+                   numranks,
+                   sy,
+                   hindex,
+                   graphNL):
+    """
 
-def get_adaptiveDM(sdc, eng, comm, rank, numranks, sy, hindex, graphNL):
+    Gets the adaptive denisty matrix at the current iteration.
 
-    if eng.interface=="PySEQM":
+    Parameters
+    ----------
+    sdc:
+        The SEDACS driver.
+    eng:
+        The SEDACS Engine.
+    comm:
+        The MPI communicator.
+    rank:
+        The MPI rank.
+    numranks:
+        Number of global MPI ranks.
+    sy:
+        The full system SEDACS System object.
+    hindex:
+        Atom-wise orbital indices.
+    graphNL:
+        The thresholded graph neighborlist.
+
+    Returns
+    -------
+    fullGraph:
+        The full graph for the entire system.
+    charges:
+        Charges for the atoms in the system.
+    parts:
+        The core partitions.
+    subSysOnRank:
+        Rank indices for a given subsystem.
+    """
+
+    if eng.interface == "PySEQM":
         get_adaptiveDM_PYSEQM(sdc, eng, comm, rank, numranks, sy, hindex, graphNL)
-        return 
+        return
 
     fullGraph = graphNL
 
-    #Iitial guess for the excess ocupation vector. This is the negative of 
-    #the charge! 
-    charges = np.zeros(sy.nats) 
-    chargesOld = np.zeros(sy.nats) 
+    # Initial guess for the excess ocupation vector. This is the negative of
+    # the charge!
+    charges = np.zeros(sy.nats)
+    chargesOld = np.zeros(sy.nats)
+
+    # TODO: Remove?
     chargesIn = None
     chargesOld = None
     chargesOut = None
+
+    # Loop over number of iterations.
     for gscf in range(sdc.numAdaptIter):
         msg = "Graph-adaptive iteration" + str(gscf)
-        status_at("get_adaptiveSCFDM",msg)
+        status_at("get_adaptiveSCFDM", msg)
+
         # Partition the graph
-        parts = graph_partition(fullGraph, sdc.partitionType, sdc.nparts, False)
+        parts = graph_partition(fullGraph,
+                                sdc.partitionType,
+                                sdc.nparts,
+                                False)
+
         njumps = 1
         partsCoreHalo = []
         numCores = []
         print("\nCore and halos indices for every part:")
+
         for i in range(sdc.nparts):
-            coreHalo, nc, nh = get_coreHaloIndices(parts[i], fullGraph, njumps)
+            coreHalo, nc, nh = get_coreHaloIndices(parts[i],
+                                                   fullGraph,
+                                                   njumps)
+
             partsCoreHalo.append(coreHalo)
             numCores.append(nc)
             print("coreHalo for part", i, "=", coreHalo)
 
-        
-        fullGraphRho,charges,subSysOnRank = get_singlePoint_charges(sdc, eng, rank, numranks, comm, parts, partsCoreHalo, sy, hindex,gscf)
-
-        print("Collected charges",charges)
+        (fullGraphRho,
+         charges,
+         subSysOnRank) = get_singlePoint_charges(sdc,
+                                                 eng,
+                                                 rank,
+                                                 numranks,
+                                                 comm,
+                                                 parts,
+                                                 partsCoreHalo,
+                                                 sy,
+                                                 hindex,
+                                                 gscf)
+
+        print("Collected charges", charges)
 
         fullGraph = add_graphs(fullGraphRho, graphNL)
         for i in range(sy.nats):
-            print("Charges:",i,sy.symbols[sy.types[i]],charges[i])
-        print("SCF ERR =",scfError)
-        print("TotalCharge",sum(charges))
-        
-        if(scfError < sdc.scfTol):
-            status_at("get_adaptiveSCFDM","SCF converged with SCF error = "+str(scfError))
+            print("Charges:", i, sy.symbols[sy.types[i]], charges[i])
+
+        # FIXME: scfError where?
+        print("SCF ERR =", scfError)
+        print("TotalCharge", sum(charges))
+
+        if (scfError < sdc.scfTol):
+            status_at("get_adaptiveSCFDM",
+                      "SCF converged with SCF error = "+str(scfError))
             break
 
-        if(gscf == sdc.numAdaptIter - 1):
-            warning_at("get_adaptiveSCFDM","SCF did not converged ... ")
+        if (gscf == sdc.numAdaptIter - 1):
+            warning_at("get_adaptiveSCFDM",
+                       "SCF did not converged ... ")
 
     AtToPrint = 0
 
     subSy = System(fullGraphRho[AtToPrint, 0])
     subSy.symbols = sy.symbols
-    subSy.coords, subSy.types = extract_subsystem(
-        sy.coords, sy.types, sy.symbols, fullGraph[AtToPrint, 1 : fullGraph[AtToPrint, 0] + 1]
-    )
+    subSy.coords, subSy.types = extract_subsystem(sy.coords,
+                                                  sy.types,
+                                                  sy.symbols,
+                                                  fullGraph[AtToPrint, 1 : fullGraph[AtToPrint, 0] + 1])
 
     if rank == 0:
         write_pdb_coordinates("subSyG_fin.pdb", subSy.coords, subSy.types, subSy.symbols)
         write_xyz_coordinates("subSyG_fin.xyz", subSy.coords, subSy.types, subSy.symbols)
 
-    return fullGraph,charges,parts,subSysOnRank
+    return fullGraph, charges, parts, subSysOnRank

From b0473ae0f0970856cfa4ea84ad7c6cad990c19c9 Mon Sep 17 00:00:00 2001
From: Robert <robertmstanton@proton.me>
Date: Thu, 29 May 2025 15:23:41 -0600
Subject: [PATCH 5/5] LATTE and PYSEQM interface conflicts resolved

---
 src/sedacs/interface/pyseqm.py  | 10 ++++++++--
 src/sedacs/interface_modules.py | 12 ++++++++----
 src/sedacs/mol_sys_data.py      |  1 +
 src/sedacs/system.py            |  4 +++-
 4 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/src/sedacs/interface/pyseqm.py b/src/sedacs/interface/pyseqm.py
index 9aa0aff..dc7a355 100644
--- a/src/sedacs/interface/pyseqm.py
+++ b/src/sedacs/interface/pyseqm.py
@@ -4,6 +4,9 @@
 from sedacs.system import get_hindex
 import sys
 
+from sedacs.types import ArrayLike
+from typing import Union
+
 from sedacs.types import ArrayLike
 from sedacs.parser import Input
 import torch
@@ -125,9 +128,9 @@ def get_coreHalo_ham_inds(partIndex: ArrayLike,
     core_indices_in_sub_expanded = core_indices_in_sub_expanded.flatten()
 
     norbs, norbs_for_every_type, hindex_sub, numel, numel_for_every_type = get_hindex(sdc.orbs,
-                                                                                      sdc.valency,
                                                                                       sy.symbols,
-                                                                                      subSy.types)
+                                                                                      subSy.types,
+                                                                                      valency=sdc.valency)
 
     hindex_sub = torch.from_numpy(hindex_sub).to(device, dtype=sdc.torch_int_dt)
     return core_indices_in_sub, core_indices_in_sub_expanded, hindex_sub
@@ -1016,3 +1019,6 @@ def get_diag_guess_pyseqm(molecule: Molecule,
         P0[molecule.Z==1,0,0] = 1.0        
 
     return P0
+
+
+    
diff --git a/src/sedacs/interface_modules.py b/src/sedacs/interface_modules.py
index 2dd051d..ea36f62 100644
--- a/src/sedacs/interface_modules.py
+++ b/src/sedacs/interface_modules.py
@@ -34,13 +34,17 @@
     error_at("interface_modules", "No specific fortran or python library exists")
     raise e
 
+
+# TODO These proxies should at some point probably be more properly
+# integrated into the code.
 try:
-    from proxies.python.hamiltonian import get_hamiltonian_proxy
-    from proxies.python.density_matrix import get_density_matrix_proxy
-    #from proxies.python.evals_dvals import get_evals_dvals_proxy
+    from proxies.python.first_level import get_hamiltonian_proxy
+    from proxies.python.first_level import get_density_matrix_proxy
 
+    import inspect
+    print(inspect.getfile(get_density_matrix_proxy))
     from proxies.python.init_proxy import init_proxy_proxy
-    from proxies.python.hamiltonian import build_coul_ham_proxy
+    from proxies.python.energy_and_forces import build_coul_ham_proxy
 except Exception as e:
     pythlib = None
     raise e
diff --git a/src/sedacs/mol_sys_data.py b/src/sedacs/mol_sys_data.py
index 6f520e0..8e5b18a 100755
--- a/src/sedacs/mol_sys_data.py
+++ b/src/sedacs/mol_sys_data.py
@@ -6,6 +6,7 @@
 
 import sys
 
+from sedacs.types import ArrayLike
 from sedacs.interface.pyseqm import pyseqmObjects
 from sedacs.engine import Engine
 import numpy as np
diff --git a/src/sedacs/system.py b/src/sedacs/system.py
index cfa1604..fc0ad25 100644
--- a/src/sedacs/system.py
+++ b/src/sedacs/system.py
@@ -1207,7 +1207,8 @@ def get_neighs_of(i, coords, neighbox, boxOfI, inbox, latticeVectors):
 # @return hindex Orbital index for each atom in the system
 # @retunn numel Total number of electrons
 #
-def get_hindex(orbs_for_every_symbol, symbols, types, verb=False):
+# def get_hindex(orbs_for_every_symbol, valency, symbols, types, verb=False):
+def get_hindex(orbs_for_every_symbol, symbols, types, valency=None, verb=False):
     nats = len(types[:])
     ntypes = len(symbols)
     hindex = np.zeros((nats + 1), dtype=int)
@@ -1241,6 +1242,7 @@ def get_hindex(orbs_for_every_symbol, symbols, types, verb=False):
         numel_for_atom = ptable.numel[atomic_number]
         norbs_for_every_type[cnt] = norbs_for_atom
         numel_for_every_type[cnt] = numel_for_atom
+        print(verb)
         if verb:
             print("type,symb,orb,valence",cnt, symbol,norbs_for_atom,numel_for_atom)
         cnt += 1