Debugging error_2

lcmd-epfl · May 6, 2024 · 21e384b · 21e384b
1 parent 1df77cd
commit 21e384b
Show file tree

Hide file tree

Showing 39 changed files with 10,344 additions and 122 deletions.
diff --git a/cell2mol/c2m_driver.py b/cell2mol/c2m_driver.py
@@ -108,7 +108,37 @@
     print(f"ENTERING cell2mol with debug={debug}")
     cell = cell2mol(newcell, reconstruction=True, charge_assignment=True, spin_assignment=True, debug=debug)
     cell.assess_errors()
+    print("*** Cell ***")
     cell.save(cell_fname)
+    print(cell)
+    print("*** Reference molecules ***")
+    print(cell.refmoleclist)
+    print("*** Molecules ***")
+    for idx, mol in enumerate(cell.moleclist):
+        if mol.iscomplex:
+            print(f"{idx}: {mol.subtype}({mol.type}) {mol.formula} {mol.is_haptic=} {mol.totcharge=} {mol.spin=}") #\n   {mol.adjnum=}\n   {mol.madjnum=} \n   {mol.smiles=}")
+            # print(mol.adjnum)
+            for lig in mol.ligands:
+                print(f"|- {lig.subtype}({lig.type}) {lig.formula} {lig.is_haptic=} {lig.denticity=} {lig.totcharge=}")# \n   {lig.smiles=}")
+                # print(f"|- {lig.connected_idx}")
+                # print(lig.groups)
+                for group in lig.groups:
+                    print(f"|-- {group.subtype} ({group.type}) {group.formula} {group.is_haptic=} {group.denticity=} {group.closest_metal.label}")
+                    for met in group.metals:
+                        print(f"|--- {met.label} {met.mconnec=}")
+                print("")
+            for metal in mol.metals:
+                print(f"|# {metal.subtype}({metal.type}) {metal.label} {metal.coord_nr=} {metal.coord_geometry} {metal.charge=} {metal.spin=} {metal.coord_sphere_formula} {metal.mconnec=} {metal.connec=}")
+                # print(f"|# {metal.get_coord_sphere_formula()}")
+                # print(f"|# {metal.coord_sphere_formula}")
+                # print(f"|# {metal.mconnec=} {metal.connec=}")
+                # print(metal.metal_adjacency)
+                # for bond in metal.bonds:
+                    # print(f"|--- {bond}")
+        else:
+            print(f"{idx}: {mol.subtype}({mol.type}) {mol.formula} {mol.totcharge=} {mol.spin=}\n  {mol.smiles}")
+        print("")
+
     output.close()
     sys.stdout = stdout
 

diff --git a/cell2mol/cell_reconstruction.py b/cell2mol/cell_reconstruction.py
@@ -196,7 +196,7 @@ def fragments_reconstruct(moleclist: list, fraglist: list, Hlist: list, refmolec
         newmols, remfrag = sequential(fraglist, refmoleclist, cellvec, factor, metal_factor, "Heavy", debug)
         print(f"FRAG_RECONSTRUCT. {len(newmols)} molecules and {len(remfrag)} fragments out of SEQUENTIAL with Heavy")
         moleclist.extend(newmols)
-
+        print(f"FRAG_RECONSTRUCT. {remfrag=}")
         # After the first step, fraglist is made of the remaining molecules in the first step, and the list of H atoms
         fraglist = []
         fraglist.extend(remfrag)
@@ -210,16 +210,16 @@ def fragments_reconstruct(moleclist: list, fraglist: list, Hlist: list, refmolec
             for frag in fraglist:
                 print("FRAG_RECONSTRUCT.", frag.formula, frag.subtype, frag.labels)
 
-        finalmols, remfrag = sequential(fraglist, refmoleclist, cellvec, factor, metal_factor, "All", debug)
+        finalmols, remfrag = sequential(fraglist, refmoleclist, cellvec, factor, metal_factor, "All", debug=2)
         moleclist.extend(finalmols)
         print(f"FRAG_RECONSTRUCT. {moleclist=}")
         print(f"FRAG_RECONSTRUCT. {remfrag=}")
-        # if len(remfrag) > 0:
-        #     for i, mol in enumerate(moleclist):
-        #         writexyz(os.getcwd(), f"moleclist_{i}.xyz", mol.labels, mol.coord)
+        if len(remfrag) > 0:
+            for i, mol in enumerate(moleclist):
+                writexyz(os.getcwd(), f"moleclist_{i}.xyz", mol.labels, mol.coord)
 
-        #     for i, rem in enumerate(remfrag):
-        #         writexyz(os.getcwd(), f"remfrag_{i}.xyz", rem.labels, rem.coord)
+            for i, rem in enumerate(remfrag):
+                writexyz(os.getcwd(), f"remfrag_{i}.xyz", rem.labels, rem.coord)
         if len(remfrag) > 0:        Warning = True;  print("FRAG_RECONSTRUCT. Remaining after Hydrogen reconstruction",remfrag)
         elif len(moleclist) == 0:   Warning = True; print("FRAG_RECONSTRUCT. No Molecules after Hydrogen reconstruction", moleclist)
         else:                       Warning = False; print("FRAG_RECONSTRUCT. No remaining Molecules after Hydrogen reconstruction")
@@ -253,7 +253,7 @@ def assign_subtype(mol: object, references: list) -> str:
     else:                  return "Other"
 
 #######################################################
-def sequential(fragmentlist: list, refmoleclist: list, cellvec: list, factor: float=1.3, metal_factor: float=1.0, typ: str="All", debug: int=1):
+def sequential(fragmentlist: list, refmoleclist: list, cellvec: list, factor: float=1.3, metal_factor: float=1.0, typ: str="All", debug: int=2):
     # Crappy function that controls the reconstruction process. It is called sequential because pairs of fragments are sent one by one. Ideally, a parallel version would be desirable.
     # Given a list of fragments(fragmentlist), a list of reference molecules(refmoleclist), and some other minor parameters, the function sends pairs of fragments and evaluates if they...
     # ...form a bigger fragment. If so, the bigger fragment is evaluated. If it coincides with one of the molecules in refmoleclist, than it means that it is a full molecule that requires no further work.
@@ -380,7 +380,10 @@ def sequential(fragmentlist: list, refmoleclist: list, cellvec: list, factor: fl
             #  Here, the function "combine" is called. It will try cell translations of one fragment, and check whether it eventually combines with the second fragment into either a bigger fragment or a molecule
             #################
             goodlist, avglist, badlist = combine(sublist, refmoleclist, cellvec, threshold_tmat, factor, metal_factor, debug=debug)
-
+            if debug >=2 :
+                print("SEQUENTIAL: goodlist", len(goodlist), [g.formula for g in goodlist])
+                print("SEQUENTIAL: avglist", len(avglist), [a.formula for a in avglist])
+                print("SEQUENTIAL: badlist", len(badlist), [b.formula for b in badlist])
             #################
             #  This part handles the results of combine
             #################
@@ -522,7 +525,7 @@ def combine(tobemerged: list, references: list, cellvec: list, threshold_tmat: f
                             lig.get_denticity(debug=debug)
                         for met in reordered_newmolec.metals:                         
                             met.get_coordination_geometry(debug=debug) 
-                    if debug >= 1: print(f"COMBINE: {reordered_newmolec.fomula=}")
+                    if debug >= 1: print(f"COMBINE: {reordered_newmolec.formula=}")
                     if debug >= 1: print(f"COMBINE: {reordered_newmolec=}")
 
                     issame = compare_species(reordered_newmolec, ref, debug=debug)

diff --git a/cell2mol/classes.py b/cell2mol/classes.py
@@ -1454,9 +1454,9 @@ def reconstruct(self, cov_factor: float=None, metal_factor: float=None, debug: i
         for f in fragments:
             if not hasattr(f,"frac_coord"):       f.get_fractional_coord(self.cellvec)
         molecules, fragments, hydrogens = classify_fragments(fragments, self.refmoleclist, debug=debug)
-        if debug > 0: print(f"CELL.RECONSTRUCT: {molecules=}")
-        if debug > 0: print(f"CELL.RECONSTRUCT: {fragments=}")
-        if debug > 0: print(f"CELL.RECONSTRUCT: {hydrogens=}")
+        if debug > 0: print(f"CELL.RECONSTRUCT: {len(molecules)} {molecules=}")
+        if debug > 0: print(f"CELL.RECONSTRUCT: {len(fragments)} {fragments=}")
+        if debug > 0: print(f"CELL.RECONSTRUCT: {len(hydrogens)} {hydrogens=}")
 
         ## Determines if Reconstruction is necessary
         if len(fragments) > 0 or len(hydrogens) > 0: self.is_fragmented = True

diff --git a/cell2mol/connectivity.py b/cell2mol/connectivity.py
@@ -7,6 +7,7 @@
 from cell2mol.other import inv, extract_from_list
 from cell2mol.elementdata import ElementData
 from cell2mol.read_write import writexyz
+import os
 elemdatabase = ElementData()
 
 #######################################################
@@ -219,7 +220,9 @@ def get_adjmatrix(labels: list, pos: list, cov_factor: float=1.3, radii="default
                 b = np.array(pos[j])
                 dist = np.linalg.norm(a - b)
                 if (elemdatabase.elementgroup[labels[i]] == 1 or elemdatabase.elementgroup[labels[j]] == 1 ) and (labels[i] != "H" and labels[j] != "H"):
-                    cov_factor = 0.6
+                    cov_factor = 1.05
+                elif (elemdatabase.elementgroup[labels[i]] == 1 and elemdatabase.elementgroup[labels[j]] == 1 ) and (labels[i] == "H" or labels[j] == "H"):
+                    cov_factor = 1.05
                 else :
                     cov_factor = 1.3
                 thres = (radii[i] + radii[j]) * cov_factor
@@ -421,6 +424,7 @@ def compare_species(mol1, mol2, check_coordinates: bool=False, debug: int=0):
         print(mol1.formula)
         print(mol2.formula)
 
+
     # a pair of species is compared on the basis of:
     # 1) the total number of atoms
     if (mol1.natoms != mol2.natoms): 
@@ -439,15 +443,15 @@ def compare_species(mol1, mol2, check_coordinates: bool=False, debug: int=0):
         if elem != mol2.element_count[kdx]: 
             if debug > 0: print(f"COMPARE_SPECIES. FALSE, different {elem} count:")
             return False       
-
+    # writexyz(os.getcwd(), f"reordered.xyz", mol1.labels, mol1.coord)
     # 4) the number of adjacencies between each pair of element types
     if not hasattr(mol1,"adj_types"):     mol1.set_adj_types()
     if not hasattr(mol2,"adj_types"):     mol2.set_adj_types()
     if debug == 2: print(f"{mol1.adj_types=}")
     if debug == 2: print(f"{mol2.adj_types=}")
 
     count = 0
-    # if debug > 0: print("COMPARE_SPECIES. kdx ldx elem1 - elem2 : reordered - reference")
+    if debug > 0: print("COMPARE_SPECIES. kdx ldx elem1 - elem2 : reordered - reference")
     for kdx, (elem, row1) in enumerate(zip(elems, mol1.adj_types)):
         for ldx, (elem2, val1) in enumerate(zip(elems, row1)):
             val2 = mol2.adj_types[kdx, ldx]

diff --git a/cell2mol/missingH.py b/cell2mol/missingH.py
@@ -129,6 +129,9 @@ def check_missingH(refmoleclist: list, debug: int=0):
                             bonded_atom_labels.append(ref.atoms[adj].label)
                         print("Adjacency", a.adjacency, bonded_atom_labels)
                         ismissingH, report = get_missingH_from_adjacency(a.atnum, a.coord, bonded_atom_coord)
+                        if ismissingH:
+                            for label, coord in zip(bonded_atom_labels, bonded_atom_coord):
+                                print("Dist", f"{a.label}-{label}", get_dist(a.coord, coord))
                         if ismissingH:
                             if debug >= 2: print("")
                             if debug >= 2: print(f"WARNING in Missing H function for: {ref.type}, {idx}, {ref.labels}")