Reset Adjacency Matrix for alkali and alkaline earth metals

lcmd-epfl · Jan 4, 2025 · abf8779 · abf8779
1 parent 6a9905e
commit abf8779
Show file tree

Hide file tree

Showing 6 changed files with 148 additions and 52 deletions.
diff --git a/cell2mol/charge_assignment.py b/cell2mol/charge_assignment.py
@@ -179,7 +179,17 @@ def select_charge_distr(charge_states: list, debug: int=0) -> list:
             if debug >= 2: print(f"    NEW SELECT FUNCTION: Case 2, more than one entry for {tgt_charge} in tmplist. Taking first")
 
     return good_states
-
+#######################################################
+def get_empty_protonation_state (specie: object, debug: int=2) -> list:
+    if debug >= 2: print(f"\nPOSCHARGE: doing empty PROTONATION for this specie {specie.formula} ({specie.subtype})")
+    #empty_list = list([np.zeros((len(specie.labels)))])
+    empty_list = []
+    for i in range(len(specie.labels)):
+        empty_list.append(int(0))
+    empty_protonation = protonation(specie.labels, specie.coord, specie.cov_factor, int(0), empty_list, empty_list, empty_list, empty_list, typ="Empty", parent=specie)
+    if debug >= 2: print("    CREATED EMPTY PROTONATION", empty_protonation)
+
+    return list([empty_protonation])
 #######################################################
 def get_protonation_states_specie(specie: object, debug: int=0) -> list:
     ##############################

diff --git a/cell2mol/classes.py b/cell2mol/classes.py
@@ -6,7 +6,7 @@
 from cell2mol.cell_reconstruction import classify_fragments, fragments_reconstruct
 from cell2mol.cell_operations import cart2frac, frac2cart_fromparam
 
-from cell2mol.charge_assignment import get_protonation_states_specie, get_possible_charge_state, get_metal_poscharges
+from cell2mol.charge_assignment import get_protonation_states_specie, get_possible_charge_state, get_metal_poscharges, get_empty_protonation_state
 from cell2mol.charge_assignment import prepare_unresolved, prepare_mols, correct_smiles_ligand
 
 from cell2mol.new_charge_assignment import set_charge_state, prepare_mol, balance_charge, assign_charge_to_specie
@@ -1357,9 +1357,12 @@ def get_unique_species(self, debug: int=0):
     #######################################################
     def check_missing_H(self, debug: int=0):
         from cell2mol.missingH import check_missingH
-        Warning, ismissingH, Missing_H_in_C, Missing_H_in_CoordWater = check_missingH(self.refmoleclist, debug=debug)
-        if ismissingH or Missing_H_in_C or Missing_H_in_CoordWater: self.has_missing_H = True
-        else:                                                       self.has_missing_H = False
+        Warning, ismissingH, Missing_H_in_C, Missing_H_in_CoordWater, Missing_H_in_Water = check_missingH(self.refmoleclist, debug=debug)
+        print(f"CELL.Check_missing_H: {Missing_H_in_C=} {Missing_H_in_CoordWater=} {Missing_H_in_Water=}")
+        if ismissingH or Missing_H_in_C or Missing_H_in_CoordWater or Missing_H_in_Water : 
+            self.has_missing_H = True
+        else:                                                       
+            self.has_missing_H = False
         return self.has_missing_H 
 
     #######################################################
@@ -1617,6 +1620,43 @@ def get_selected_cs(self, debug: int=0):
                 self.selected_cs.append(list([cs.corr_total_charge for cs in specie.possible_cs]))
             else :
                 self.selected_cs.append(specie.possible_cs)
+
+        #     if unique_specie.subtype == "metal":
+        #         self.selected_cs.append(unique_specie.possible_cs)
+        #     else:
+        #         if tmp is not None:
+        #             self.selected_cs.append(list([cs.corr_total_charge for cs in unique_specie.possible_cs]))
+        #         else:
+        #             if unique_specie.subtype == "ligand" :
+        #                 unique_specie.protonation_states = get_empty_protonation_state(unique_specie)
+        #                 tmp_2 = unique_specie.get_possible_cs(debug=debug)
+        #                 if tmp_2 is None:
+        #                     self.selected_cs.append(None)
+        #                 else:
+        #                     self.selected_cs.append(list([cs.corr_total_charge for cs in unique_specie.possible_cs]))
+        #             else:
+        #                 self.selected_cs.append(None)
+
+
+        # for specie in self.species_list:
+        #     print("Get possible charge states for species list", specie.formula)
+        #     tmp = specie.get_possible_cs(debug=debug)
+        #     if specie.subtype == "metal":
+        #         self.selected_cs.append(specie.possible_cs)
+        #     else:
+        #         if tmp is not None:
+        #             self.selected_cs.append(list([cs.corr_total_charge for cs in specie.possible_cs]))
+        #         else:
+        #             if specie.subtype == "ligand" :
+        #                 specie.protonation_states = get_empty_protonation_state(specie)
+        #                 tmp_2 = specie.get_possible_cs(debug=debug)
+        #                 if tmp_2 is None:
+        #                     self.selected_cs.append(None)
+        #                 else:
+        #                     self.selected_cs.append(list([cs.corr_total_charge for cs in specie.possible_cs]))
+        #             else:
+        #                 self.selected_cs.append(None)            
+
 
         if None in self.selected_cs:
             self.error_get_poscharges = True

diff --git a/cell2mol/connectivity.py b/cell2mol/connectivity.py
@@ -154,6 +154,17 @@ def get_metal_idxs(labels: list, debug: int=0):
         if (elemdatabase.elementblock[l] == 'd' or elemdatabase.elementblock[l] == 'f'): metal_indices.append(idx)
     return metal_indices
 
+################################
+def get_alkali_alkaline_earth_metal_idxs(labels: list, debug: int=0):
+    """ alkali metals (Group 1) and alkaline earth metals (Group 2)  """
+    alkali_alkaline_earth_metal_indices = []
+    for idx, l in enumerate(labels):
+        if elemdatabase.elementgroup[l]==1 and l != "H" and l != "D":
+            alkali_alkaline_earth_metal_indices.append(idx)
+        elif elemdatabase.elementgroup[l]==2 :
+            alkali_alkaline_earth_metal_indices.append(idx)
+    return alkali_alkaline_earth_metal_indices
+
 ################################
 def get_metal_species(labels: list):
     from cell2mol.elementdata import ElementData
@@ -232,16 +243,11 @@ def get_adjmatrix(labels: list, pos: list, cov_factor: float=1.3, radii="default
     # Creates Adjacency Matrix
     for i in range(0, natoms - 1):
         for j in range(i, natoms):
-            if i != j:
+            if i != j:                
                 a = np.array(pos[i])
                 b = np.array(pos[j])
                 dist = np.linalg.norm(a - b)
-                # if (elemdatabase.elementgroup[labels[i]] == 1 or elemdatabase.elementgroup[labels[j]] == 1 ) and (labels[i] != "H" and labels[j] != "H"):
-                #     cov_factor = 1.05
-                # elif (elemdatabase.elementgroup[labels[i]] == 1 and elemdatabase.elementgroup[labels[j]] == 1 ) and (labels[i] == "H" or labels[j] == "H"):
-                #     cov_factor = 1.05
-                # else :
-                #     cov_factor = 1.3
+
                 thres = (radii[i] + radii[j]) * cov_factor
                 if thres - (radii[i] + radii[j]) > 0.8:
                     thres = (radii[i] + radii[j]) + add_factor
@@ -260,6 +266,18 @@ def get_adjmatrix(labels: list, pos: list, cov_factor: float=1.3, radii="default
                         or elemdatabase.elementblock[labels[j]] == "f"):
                             adjmat[i, j] = 1
                             adjmat[j, i] = 1
+
+    # Set Adjacency Matrix as zeros for alkali and alkaline earth metals
+    for i in range(0, natoms - 1):
+        for j in range(i, natoms):
+            if i != j:
+                if (elemdatabase.elementgroup[labels[i]] == 2 or elemdatabase.elementgroup[labels[j]] == 2) :  
+                    adjmat[i, j] = 0
+                    adjmat[j, i] = 0   
+                elif (labels[i] != "H" and labels[j] != "H") and (labels[i] != "D" and labels[j] != "D"): 
+                    if (elemdatabase.elementgroup[labels[i]] == 1 or elemdatabase.elementgroup[labels[j]] == 1 ):     
+                        adjmat[i, j] = 0
+                        adjmat[j, i] = 0  
 
     # Sums the adjacencies of each atom to obtain "adjnum" 
     for i in range(0, natoms):

diff --git a/cell2mol/missingH.py b/cell2mol/missingH.py
@@ -60,7 +60,9 @@ def get_missingH_from_adjacency(Z, center, points, bonded_atom_labels):
             missingH = True
     elif val_e == shapeval :  
         missingH = False
-    else :
+    elif val_e > shapeval:  # carbon has more than 4 bonds       
+        missingH = False
+    else : # val_e < shapeval
         missingH = True
 
     # Saves report
@@ -108,12 +110,13 @@ def find_shape(vecs):
 def check_missingH(refmoleclist: list, debug: int=0):
 
     Missing_H_in_C = False
+    Missing_H_in_Water = False
     Missing_H_in_CoordWater = False
     ismissingH = False
     Warning = False
 
     # List of Metal Atoms for which O atoms might appear connected directly.
-    Exceptions_for_CoordWater = ["Re", "V", "Mo", "W", "Fe"]
+    Exceptions_for_CoordWater = ["Re", "V", "Mo", "W", "Fe", "Tc"]
 
     if debug >= 1: print("")
     if debug >= 1: print("##################")
@@ -122,7 +125,8 @@ def check_missingH(refmoleclist: list, debug: int=0):
     for idx, ref in enumerate(refmoleclist):
         if not ref.iscomplex:
             if ref.natoms == 1 and "O" in ref.labels: 
-                Missing_H_in_CoordWater = True
+                Missing_H_in_Water = True
+                # Missing_H_in_CoordWater = True
                 if debug >= 1: print(f"WARNING found isolated O atom in the cell. This tends to be a water with missing H, so stopping")
             elif ref.formula == "CO" or ref.formula == "CN":
                 pass
@@ -154,7 +158,7 @@ def check_missingH(refmoleclist: list, debug: int=0):
                     else:
                         Missing_H_in_CoordWater = True
                         if debug >= 1: print("")
-                        if debug >= 1: print("WARNING in Missing H function for ligand",lig.natoms,lig.labels)
+                        if debug >= 1: print("WARNING in Missing H function for ligand", lig.natoms, lig.labels)
                 elif lig.formula == "CO" or lig.formula == "CN":
                     pass
                 else:
@@ -169,14 +173,15 @@ def check_missingH(refmoleclist: list, debug: int=0):
                             if debug >= 2: print("Adjacency", a.adjacency, bonded_atom_labels)
                             ismissingH, report = get_missingH_from_adjacency(a.atnum, a.coord, bonded_atom_coord, bonded_atom_labels)
                             if ismissingH:
+                                print(a.label, a.mconnec, a.coord)
                                 if debug >= 1: print("")
                                 if debug >= 1: print(f"WARNING in Missing H function for: {ref.type}, molecule index {idx}, ligand index {jdx}, {lig.formula}")
                                 if debug >= 1: print(f"C Atom {kdx} {a.get_parent_index('molecule')} has missing H atoms")
                                 if debug >= 1: print(report)
                                 Missing_H_in_C = True
 
-    if Missing_H_in_C or Missing_H_in_CoordWater:  Warning = True
+    if Missing_H_in_C or Missing_H_in_CoordWater or Missing_H_in_Water :  Warning = True
     if not Warning:
         if debug >= 1: print("Not a Single Molecule has Missing H atoms (apparently)")
 
-    return Warning, ismissingH, Missing_H_in_C, Missing_H_in_CoordWater
+    return Warning, ismissingH, Missing_H_in_C, Missing_H_in_CoordWater, Missing_H_in_Water
diff --git a/cell2mol/refcell.py b/cell2mol/refcell.py
@@ -21,41 +21,60 @@ def process_refcell(input_path, name, current_dir, debug=0):
     ref_cell_fname = os.path.join(current_dir, f"Ref_Cell_{name}.cell")
     output_fname = os.path.join(current_dir, "cell2mol.out")
     summary_fname = os.path.join(current_dir, "summary.out")
-
-    if os.path.exists(ref_cell_fname):
-        with open(output_fname, "a") as output:
-            with redirect_stdout(output):
-                print("=====================================================")
-                print(f"cell2mol version {VERSION}")
-                print(f"Reference cell file {ref_cell_fname} already exists. Skipping reference cell generation.")
-                print(f"Debug level: {debug}")
-                refcell = np.load(ref_cell_fname, allow_pickle=True)
-                if refcell.error_case == 0:
-                    get_unique_species_in_reference(refcell, debug) 
-                else:
-                    print(f"Error occurred in processing reference cell: error case {refcell.error_case}")   
-                refcell.save(ref_cell_fname)
-    else:
-        with open(output_fname, "w") as output:
-            with redirect_stdout(output):
-                print(f"cell2mol version {VERSION}")
-                print(f"INITIATING cell object from input path: {input_path}")
-                print(f"Debug level: {debug}")
+    with open(output_fname, "w") as output:
+        with redirect_stdout(output):
+            print(f"cell2mol version {VERSION}")
+            print(f"INITIATING cell object from input path: {input_path}")
+            print(f"Debug level: {debug}")
+
+            # # Read .cif file
+            structure = read(input_path)
+            cell_vector = structure.cell.array
+            cell_param = structure.cell.cellpar()      
+
+            # Create the reference cell
+            refcell = create_reference(input_path, name, cell_vector, cell_param, debug)
+
+            # Finalize and save the reference cell object if no errors
+            if refcell.error_case == 0:
+                get_unique_species_in_reference(refcell, debug) 
+            else:
+                print(f"Error occurred in processing reference cell: error case {refcell.error_case}")
+            refcell.save(ref_cell_fname)   
+    # if os.path.exists(ref_cell_fname):
+    #     with open(output_fname, "a") as output:
+    #         with redirect_stdout(output):
+    #             print("=====================================================")
+    #             print(f"cell2mol version {VERSION}")
+    #             print(f"Reference cell file {ref_cell_fname} already exists. Skipping reference cell generation.")
+    #             print(f"Debug level: {debug}")
+    #             refcell = np.load(ref_cell_fname, allow_pickle=True)
+    #             if refcell.error_case == 0:
+    #                 get_unique_species_in_reference(refcell, debug) 
+    #             else:
+    #                 print(f"Error occurred in processing reference cell: error case {refcell.error_case}")   
+    #             refcell.save(ref_cell_fname)
+    # else:
+    #     with open(output_fname, "w") as output:
+    #         with redirect_stdout(output):
+    #             print(f"cell2mol version {VERSION}")
+    #             print(f"INITIATING cell object from input path: {input_path}")
+    #             print(f"Debug level: {debug}")
 
-                # # Read .cif file
-                structure = read(input_path)
-                cell_vector = structure.cell.array
-                cell_param = structure.cell.cellpar()      
+    #             # # Read .cif file
+    #             structure = read(input_path)
+    #             cell_vector = structure.cell.array
+    #             cell_param = structure.cell.cellpar()      
 
-                # Create the reference cell
-                refcell = create_reference(input_path, name, cell_vector, cell_param, debug)
+    #             # Create the reference cell
+    #             refcell = create_reference(input_path, name, cell_vector, cell_param, debug)
 
-                # Finalize and save the reference cell object if no errors
-                if refcell.error_case == 0:
-                    pass
-                else:
-                    print(f"Error occurred in processing reference cell: error case {refcell.error_case}")
-                refcell.save(ref_cell_fname)
+    #             # Finalize and save the reference cell object if no errors
+    #             if refcell.error_case == 0:
+    #                 pass
+    #             else:
+    #                 print(f"Error occurred in processing reference cell: error case {refcell.error_case}")
+    #             refcell.save(ref_cell_fname)
 
     error_fname = os.path.join(current_dir, f"reference_error_{refcell.error_case}.out")
     with open(error_fname, "w") as error_output:

diff --git a/cell2mol/xyz2mol.py b/cell2mol/xyz2mol.py
@@ -512,9 +512,13 @@ def AC2BO(AC, atoms, charge, allow_charged_fragments=True, use_graph=True):
         #    print("Possible valences for:", atomicNum,"are",possible_valence, valence)
         if len(possible_valence) == 0:
             element = elemdatabase.elementsym[atomicNum]
-            if elemdatabase.elementperiod[element] < 3 :
+            if elemdatabase.elementgroup[element] == 1 or elemdatabase.elementgroup[element] == 2 : # Alkali and Alkaline earth metals
                 print('WARNING!! Valence of atom', element, i,\
-                    'is',valence,'which bigger than allowed max',max(atomic_valence[atomicNum]),'. Stopping')
+                    'is', valence,'which is bigger than allowed max',max(atomic_valence[atomicNum]),'. Stopping')
+                possible_valence.append(valence)
+            elif elemdatabase.elementperiod[element] < 3 :
+                print('WARNING!! Valence of atom', element, i,\
+                    'is', valence,'which bigger than allowed max',max(atomic_valence[atomicNum]),'. Stopping')
                 possible_valence.append(valence)
                 wrong += 1
             else: