diff --git a/cell2mol/c2m_driver.py b/cell2mol/c2m_driver.py index 63e377da..95ba6894 100644 --- a/cell2mol/c2m_driver.py +++ b/cell2mol/c2m_driver.py @@ -8,6 +8,7 @@ from cell2mol.c2m_module import save_cell, cell2mol from cell2mol.cif2info import cif_2_info from cell2mol.classes import * +from cell2mol.readwrite import readinfo if __name__ != "__main__" and __name__ != "cell2mol.c2m_driver": sys.exit(1) @@ -37,9 +38,18 @@ ## If the input is a .cif file, then it is converted to a .info file using cif_2_info from cif2cell if extension == ".cif": + # TODO: Pre-filtering of the .cif file + with open(input_path, 'r') as ciffile: + if 'radical' in ciffile.read(): + sys.exit(0) + elif '_atom_site_fract_x' not in ciffile.read(): + sys.exit(0) + elif '?' in ciffile.read(): + sys.exit(0) + else: pass + errorpath = os.path.join(current_dir, "cif2cell.err") infopath = os.path.join(current_dir, "{}.info".format(name)) - # TODO: Pre-filtering of the .cif file # if error exist : sys.exit(1) # Create .info file cif_2_info(input_path, infopath, errorpath) diff --git a/cell2mol/classes.py b/cell2mol/classes.py index 82939c43..8bd69b67 100644 --- a/cell2mol/classes.py +++ b/cell2mol/classes.py @@ -199,6 +199,40 @@ def get_possible_cs(self, debug: int=0): if not hasattr(self,"protonation_states"): self.get_protonation_states(debug=debug) if self.protonation_states is not None: self.possible_cs = get_poscharges(self, debug=debug) return self.possible_cs + + ############ + def create_bonds(self, debug: int=0): + if not hasattr(self,"rdkit_mol"): self.parent.assign_charges() + for idx, atom in enumerate(self.atoms): + # Security Check. Confirms that the labels are the same + #if debug >= 2: print("BUILD BONDS: atom", idx, a.label) + rdkitatom = self.rdkit_mol.GetAtomWithIdx(idx) + tmp = rdkitatom.GetSymbol() + if atom.label != tmp: print("Error in Build_Bonds. Atom labels do not coincide. GMOL vs. MOL:", atom.label, tmp) + else: + # First part. Creates bond information + for b in rdkitatom.GetBonds(): + bond_startatom = b.GetBeginAtomIdx() + bond_endatom = b.GetEndAtomIdx() + bond_order = b.GetBondTypeAsDouble() + + if (self.subtype == "ligand") and (bond_startatom >= self.natoms or bond_endatom >= self.natoms): + continue + else: + if self.atoms[bond_endatom].label != self.rdkit_mol.GetAtomWithIdx(bond_endatom).GetSymbol(): + if debug >= 1: + print("Error with Bond EndAtom", self.atoms[bond_endatom].label, self.rdkit_mol.GetAtomWithIdx(bond_endatom).GetSymbol()) + else: + if bond_endatom == idx: + start = bond_endatom + end = bond_startatom + elif bond_startatom == idx: + start = bond_startatom + end = bond_endatom + + ## This has changed. Now there is a bond object, and we send the atom objects, not only the index + new_bond = bond(self.atoms[start], self.atoms[end], bond_order) + atom.add_bond(new_bond) ############ def print_xyz(self): @@ -295,8 +329,6 @@ def split_complex(self, debug: int=0): if hasattr(self,"frac_coord"): lig_frac_coord = extract_from_list(b, rest_frac, dimension=1) newligand.set_fractional_coord(lig_frac_coord) - #newligand = check_metal_coordinating_atoms (newligand, debug=debug) - # newligand.check_metal_coordinating_atoms(debug=debug) # TODO: Implement this function to ligand class self.ligands.append(newligand) ## Arranges Metals @@ -518,8 +550,6 @@ def check_denticity(self, debug: int=0): else: if debug > 0: print(f"GROUP.check_denticity: corrected mconnec of atom {idx} with label {a.label}") a.reset_mconnec(idx) - - # TODO : check whether assgined denticifiy is correct. otherwise, correct it. if self.is_haptic: self = coordination_correction_for_haptic(self, debug=debug) if self.is_haptic == False : self = coordination_correction_for_nonhaptic(self, debug=debug) self.checked_denticity = True @@ -560,6 +590,16 @@ def __init__(self, label: str, coord: list, parent_index: int=None, radii: float if parent is not None: self.occurence = parent.get_occurrence(self) if frac_coord is not None: self.frac_coord = frac_coord + ####################################################### + def check_connectivity(self, other: object, debug: int=0): + ## Checks whether two atoms are connected (through the adjacency) + if not isinstance(other, type(self)): return False + labels = list([self.label,other.label]) + coords = list([self.coord,other.coord]) + isgood, adjmat, adjnum = get_adjmatrix(labels, coords) + if isgood and adjnum[0] > 0: return True + else: return False + ####################################################### def add_bond(self,newbond: object): if not hasattr(self,"bonds"): self.bonds = [] @@ -750,8 +790,8 @@ def get_reference_molecules(self, ref_labels: list, ref_fracs: list, cov_factor: self.refmoleclist = [] for b in blocklist: mol_labels = extract_from_list(b, ref_labels, dimension=1) - mol_coord = extract_from_list(b, ref_pos, dimension=1) - mol_frac_coord = extract_from_list(b, self.frac_coord, dimension=1) + mol_coord = extract_from_list(b, ref_pos, dimension=1) + mol_frac_coord = extract_from_list(b, self.frac_coord, dimension=1) newmolec = molecule(mol_labels, mol_coord) newmolec.set_fractional_coord(mol_frac_coord) # This must be below the frac_coord, so they are carried on to the ligands @@ -793,7 +833,7 @@ def get_moleclist(self, blocklist=None): self.moleclist = [] for b in blocklist: mol_labels = extract_from_list(b, self.labels, dimension=1) - mol_coord = extract_from_list(b, self.coord, dimension=1) + mol_coord = extract_from_list(b, self.coord, dimension=1) mol_radii = extract_from_list(b, self.radii, dimension=1) newmolec = molecule(mol_labels, mol_coord, b, mol_radii, parent=self) # If fractional coordinates are available... @@ -883,29 +923,26 @@ def assign_charges(self, debug: int=0) -> object: if self.is_fragmented: return None # Stopping. self.is_fragmented must be false to determine the charges of the cell # (1) Indentify unique chemical species - unique_species, unique_indices = identify_unique_species(self.moleclist, debug=debug) - if debug >= 1: print(f"{len(unique_species)} Species (Metal or Ligand or Molecules) to Characterize") - self.speclist = [spec[1] for spec in unique_species] # spec is a list in which item 1 is the actual unique specie + self.speclist, self.unique_indices = identify_unique_species(self.moleclist, debug=debug) + if debug >= 1: print(f"{len(self.speclist)} Species (Metal or Ligand or Molecules) to Characterize") # (2) Gets a preliminary list of possible charge states for each specie (former drive_poscharge) selected_cs = [] - for idx, spec in enumerate(unique_species): + for idx, spec in enumerate(self.speclist): tmp = spec.get_possible_cs(debug=debug) - if tmp is None: - self.error_empty_poscharges = True; return None # Empty list of possible charges received. Stopping - else: - self.error_empty_poscharges = False - if spec.subtype == "metal": selected_cs.append([]) ## I don't like to add an empty list - else: selected_cs.append(tmp) + if tmp is None: self.error_empty_poscharges = True; return None # Empty list of possible charges received. Stopping + if spec.subtype == "metal": selected_cs.append([]) ## I don't like to add an empty list + else: selected_cs.append(tmp) + self.error_empty_poscharges = False # Finds the charge_state that satisfies that the crystal must be neutral - final_charge_distribution = balance_charge(unique_indices, unique_species, debug=debug) + final_charge_distribution = balance_charge(self.unique_indices, self.speclist, debug=debug) if len(final_charge_distribution) > 1: if debug >= 1: print("More than one Possible Distribution Found:", final_charge_distribution) self.error_multiple_distrib = True self.error_empty_distrib = False - pp_mols, pp_idx, pp_opt = prepare_unresolved(unique_indices, unique_species, final_charge_distribution, debug=debug) + pp_mols, pp_idx, pp_opt = prepare_unresolved(self.unique_indices, self.speclist, final_charge_distribution, debug=debug) self.data_for_postproc(pp_mols, pp_idx, pp_opt) return None elif len(final_charge_distribution) == 0: # @@ -919,19 +956,56 @@ def assign_charges(self, debug: int=0) -> object: print("#########################################") print("Assigning Charges and Preparing Molecules") print("#########################################") - self.moleclist, self.error_prepare_mols = prepare_mols(self.moleclist, unique_indices, unique_species, selected_charge_states, final_charge_distribution[0], debug=debug) - + self.moleclist, self.error_prepare_mols = prepare_mols(self.moleclist, self.unique_indices, self.speclist, selected_cs, final_charge_distribution[0], debug=debug) if self.error_prepare_mols: return None # Error while preparing molecules - for mol in self.moleclist: - mol.build_bonds(debug=debug) ## TODO: Adapt build_bonds function to specie class - return self.moleclist + + ####################################################### + def create_bonds(self, debug: int=0): + if not hasattr(self,"error_prepare_mols"): self.assign_charges(debug=debug) + if self.error_prepare_mols: return None # Stopping. self.error_prepare_mols must be false to create the spin + for mol in self.moleclist: + # First part + if not mol.iscomplex: + mol.create_bonds(debug=debug) ### Creates bonds between molecule.atoms using the molecule.rdkit_object + # Second part + if mol.iscomplex: + for lig in mol.ligands: + lig.create_bonds(debug=debug) ### Creates bonds between ligand.atoms, which also belong to molecule.atoms, using the ligand.rdkit_object + + # Third Part. Adds Metal-Ligand Bonds, with an arbitrary 0.5 order: + if mol.iscomplex: + for lig in mol.ligands: + for at in lig.atoms: + count = 0 + for met in mol.metals: + isconnected = check_connectivity(at, met) + if isconnected: + newbond = bond(at, met, 0.5) + count += 1 + if count != at.mconnec: + print(f"CELL.CREATE_BONDS: error creating bonds for atom: \n{atom}\n of ligand: \n{lig}\n") + print(f"CELL.CREATE_BONDS: count differs from atom.mconnec: {count}, {at.mconnec}") + + # Adds Metal-Metal Bonds, with an arbitrary 0.5 order: + + # Fourth part : correction smiles of ligands + mol.smiles_with_H = [lig.smiles for lig in mol.ligands] + for lig in mol.ligands: + lig.correction_smiles() + + ####################################################### + def correction_smiles (self): + if not hasattr(self.parent,"smiles"): self.parent.smiles = [] + self.smiles, self.rdkit_mol = correct_smiles_ligand(self) + self.parent.smiles.append(self.smiles) + ####################################################### def assign_spin(self, debug: int=0) -> object: - if not hasattr(self,"is_preparemol"): self.determine_charge(debug=debug) - if self.is_preparemol: return None # Stopping. self.is_preparemol must be false to assign the spin + if not hasattr(self,"error_prepare_mols"): self.assign_charges(debug=debug) + if self.error_prepare_mols: return None # Stopping. self.error_prepare_mols must be false to assign the spin for mol in self.moleclist: if mol.iscomplex: for metal in mol.metals: diff --git a/cell2mol/coordination_sphere.py b/cell2mol/coordination_sphere.py index 0846a085..cbdc7e20 100644 --- a/cell2mol/coordination_sphere.py +++ b/cell2mol/coordination_sphere.py @@ -163,8 +163,6 @@ def shape_measure (symbols: list, positions: list, debug: int=0) -> dict: ####################################################### ### Make corrcetion for coordination sphere ### ####################################################### -# TODO: will be done in the future -####################################################### covalent_factor_for_metal_v2 = { 'H': 1.19, 'D': 1.19, @@ -401,4 +399,6 @@ def coordination_correction_for_haptic (group, debug=1) -> list: if count == 0 : return group else : group.get_hapticity() - return group \ No newline at end of file + return group + +####################################################### \ No newline at end of file diff --git a/cell2mol/yuri_formal_charge.py b/cell2mol/yuri_formal_charge.py index 49e89c09..c02ba234 100644 --- a/cell2mol/yuri_formal_charge.py +++ b/cell2mol/yuri_formal_charge.py @@ -6,6 +6,10 @@ from collections import defaultdict import itertools import sys +from cell2mol.hungarian import reorder +from cell2mol.xyz2mol import xyz2mol + +from cell2mol.classes import specie, molecule, ligand, atom, metal, group, bond elemdatabase = ElementData() @@ -93,7 +97,7 @@ def get_poscharges(spec: object, debug: int=0): selected_charge_states = [] ############################## - #### Evaluates possible charges except if the ligand is a nitrosyl + #### Evaluates possible charges ############################## for prot in spec.protonation_states: charge_states = [] @@ -238,7 +242,7 @@ def get_protonation_states(specie: object, debug: int=0) -> list: elif specie.subtype == "molecule" and specie.iscomplex == True: return None elif specie.subtype == "molecule" and specie.iscomplex == False: if debug >= 2: print(f" POSCHARGE: doing empty PROTONATION for this specie") - empty_protonation = protonation(specie.labels, specie.coord, specie.cov_factor, int(0), [], [], [], [], typ="Empty") + empty_protonation = protonation(specie.labels, specie.coord, specie.cov_factor, int(0), [], [], [], [], typ="Empty", parent=specie) return list(empty_protonation) ## If specie.subtype == "ligand": @@ -404,18 +408,20 @@ def get_protonation_states(specie: object, debug: int=0) -> list: elif a.connec >= 1: block[idx] = 1 # Oxygen - elif a.label == "O" or a.label == "S" or a.label == "Se": + elif a.label == "O" : if a.connec == 1: needs_nonlocal = True non_local_groups += 1 if debug >= 2: print(f" GET_PROTONATION_STATES: will be sent to nonlocal due to {a.label} atom") elif a.connec > 1: block[idx] = 1 - # SERGI: I'm considering a different one with S and Se - # elif a.label == "S" or a.label == "Se": - # if a.connec == 1: - # elemlist[idx] = "H" - # addedlist[idx] = 1 + # Sulfur and Selenium + elif a.label == "S" or a.label == "Se": + if a.connec == 1: + elemlist[idx] = "H" + addedlist[idx] = 1 + elif a.connec > 1: + block[idx] = 1 # Hydrides elif a.label == "H": if a.connec == 0: @@ -516,7 +522,7 @@ def get_protonation_states(specie: object, debug: int=0) -> list: ############################ if not needs_nonlocal: - new_prot = protonation(newlab, newcoord, ligand.cov_factor, added_atoms, addedlist, block, metal_electrons, elemlist) + new_prot = protonation(newlab, newcoord, ligand.cov_factor, added_atoms, addedlist, block, metal_electrons, elemlist, parent=specie) protonation_states.append(new_prot) else: # Generate the new adjacency matrix after local elements have been added to be sent to xyz2mol @@ -524,7 +530,7 @@ def get_protonation_states(specie: object, debug: int=0) -> list: local_coords = newcoord.copy() local_radii = get_radii(local_labels) local_natoms = len(local_labels) - local_atnums = [int_atom(label) for label in local_labels] # from xyz2mol.py + #local_atnums = [int_atom(label) for label in local_labels] # from xyz2mol.py dummy, local_adjmat, local_adjnum = get_adjmatrix(local_labels, local_coords, ligand.cov_factor, local_radii) local_addedlist = addedlist.copy() @@ -596,7 +602,7 @@ def get_protonation_states(specie: object, debug: int=0) -> list: smi = " " - new_prot = protonation(newlab, newcoord, ligand.cov_factor, added_atoms, addedlist, block, metal_electrons, elemlist, smi, os, typ="Non-local") + new_prot = protonation(newlab, newcoord, ligand.cov_factor, added_atoms, addedlist, block, metal_electrons, elemlist, smi, os, typ="Non-local", parent=specie) if new_prot.status == 1 and new_prot.added_atoms == os+local_added_atoms: protonation_states.append(new_prot) if debug >= 2: print(f" GET_PROTONATION_STATES: Protonation SAVED with {added_atoms} atoms added to ligand. status={new_prot.status}") @@ -606,19 +612,15 @@ def get_protonation_states(specie: object, debug: int=0) -> list: return protonation_states ####################################################### -def get_charge(labels: list, pos: list, adjmat: np.ndarray, ich: int, cov_factor: float=1.3, allow: bool=True, debug: int=0) -> list: - ## Generates the connectivity of a molecule given a charge. - # The molecule is described by the labels, and the atomic cartesian coordinates "pos" - # The adjacency matrix is also provided (adjmat) - #:return iscorrect: boolean variable with a notion of whether the function delivered a good=True or bad=False connectivity - #:return total_charge: total charge associated with the connectivity - #:return atom_charge: atomic charge for each atom of the molecule - #:return mols: rdkit molecule object - #:return smiles: smiles representation of the molecule +def get_charge(ich: int, prot: object, allow: bool=True, debug: int=0): + ## Generates the connectivity of a molecule given a desired charge (ich). + # The molecule is described by a protonation states that has labels, and the atomic cartesian coordinates "coords" + # The adjacency matrix is also provided in the protonation state(adjmat) + #:return charge_state which is an object with the necessary information for other functions to handle the result pt = Chem.GetPeriodicTable() # needed to retrieve the default valences in the 2nd and 3rd checks - natoms = len(labels) - atnums = [elemdatabase.elementnr[label] for label in labels] # from xyz2mol + natoms = prot.natoms + atnums = prot.atnums ########################## # xyz2mol is called here # @@ -627,27 +629,40 @@ def get_charge(labels: list, pos: list, adjmat: np.ndarray, ich: int, cov_factor # allow_charged_fragments is necessary for non-neutral molecules # embed_chiral shouldn't ideally be necessary, but it runs a sanity check that improves the proposed connectivity # use_huckel false means that the xyz2mol adjacency will be generated based on atom distances and vdw radii. - # Ideally, the adjacency matrix could be provided + # instead of use_huckel, we provide the adjacency matrix - mols = xyz2mol(atnums,pos,adjmat,cov_factor,charge=ich,use_graph=True,allow_charged_fragments=allow,embed_chiral=True,use_huckel=False) + mols = xyz2mol(atnums,prot.coords,prot.adjmat,prot.cov_factor,charge=ich,use_graph=True,allow_charged_fragments=allow,embed_chiral=True,use_huckel=False) if len(mols) > 1: print("WARNING: More than 1 mol received from xyz2mol for initcharge:", ich) - # smiles is generated with rdkit + # Smiles are generated with rdkit smiles = Chem.MolToSmiles(mols[0]) - # Here, the atom charge is retrieved, and the connectivity of each atom goes through 3 checks. - # The variable iscorrect will track whether the overall generated structure is meaningful - iscorrect = True + # Gets the resulting charges atom_charge = [] total_charge = 0 for i in range(natoms): a = mols[0].GetAtomWithIdx(i) # Returns a particular Atom atom_charge.append(a.GetFormalCharge()) + total_charge += a.GetFormalCharge() + # Connectivity is checked + iscorrect = check_rdkit_mol_connectivity(mols[0], prot.natoms, debug=debug) + + # Charge_state is initiated + ch_state = charge_state(iscorrect, total_charge, atom_charge, mols[0], smiles, ich, allow) + + return ch_state + +def check_rdkit_mol_connectivity(mol: object, natoms: int, debug: int=0): + # Here, the atom charge is retrieved, and the connectivity of each atom goes through 3 checks. + # The variable iscorrect will track whether the overall generated structure is meaningful + iscorrect = True + for i in range(natoms): + a = mols[0].GetAtomWithIdx(i) # Returns a particular Atom valence = a.GetTotalValence() # Valence of the atom in the mol object bonds = 0 countaromatic = 0 - for b in a.GetBonds(): # Returns a read-only sequence containing all of the molecule’s Bonds + for b in a.GetBonds(): # Returns a read-only sequence containing all of the molecule' bonds bonds += b.GetBondTypeAsDouble() # total number of bonds (weighted by bond order) of the atom in the mol object # Returns the type of the bond as a double (i.e. 1.0 for SINGLE, 1.5 for AROMATIC, 2.0 for DOUBLE) @@ -656,15 +671,13 @@ def get_charge(labels: list, pos: list, adjmat: np.ndarray, ich: int, cov_factor if countaromatic % 2 != 0: bonds -= 0.5 - total_charge += a.GetFormalCharge() lonepairs = (elemdatabase.valenceelectrons[a.GetSymbol()] - a.GetFormalCharge() - valence) / 2 totalvalenceelectrons = int(bonds) + int(lonepairs) * 2 + a.GetFormalCharge() # Checks the quality of the resulting smiles # First check, the number of lonepairs is computed and should make sense if lonepairs != 0 and lonepairs != 1 and lonepairs != 2 and lonepairs != 3 and lonepairs != 4: - if debug >= 2: - print("GETCHARGE: 2nd Check-lonepairs=", i, a.GetSymbol(), lonepairs) + if debug >= 2: print("GETCHARGE: 2nd Check-lonepairs=", i, a.GetSymbol(), lonepairs) iscorrect = False # RDKIT has some troubles assigning the valence for atoms with aromatic bonds. @@ -673,9 +686,7 @@ def get_charge(labels: list, pos: list, adjmat: np.ndarray, ich: int, cov_factor # Second check, the number of bonds should coincide with the valence. # I know it should be the same, but in bad SMILES they often do not coincide if bonds != valence: - if debug >= 2: - print("GETCHARGE: 1st Check-bonds/valence:",i,a.GetSymbol(),bonds,valence) - iscorrect = False + if debug >= 2: print("GETCHARGE: 1st Check-bonds/valence:",i,a.GetSymbol(),bonds,valence); iscorrect = False if debug >= 2: for b in a.GetBonds(): print(b.GetBondTypeAsDouble(),b.GetBeginAtomIdx(),b.GetEndAtomIdx()) @@ -685,49 +696,40 @@ def get_charge(labels: list, pos: list, adjmat: np.ndarray, ich: int, cov_factor if debug >= 2: print("GETCHARGE: 3rd Check: Valence gives false for atom",i,a.GetSymbol(),"with:",totalvalenceelectrons,elemdatabase.valenceelectrons[a.GetSymbol()]) iscorrect = False - if debug >= 2 and i == 0: - print("ich, atom idx, label, charge, pt.GetDefaultValence(a.GetAtomicNum()), valence, num bonds, num lonepairs, iscorrect") - if debug >= 2: print(ich,i,a.GetSymbol(),a.GetFormalCharge(),pt.GetDefaultValence(a.GetAtomicNum()),valence,int(bonds),int(lonepairs),iscorrect) + if debug >= 2 and i == 0: print("ich, atom idx, label, charge, pt.GetDefaultValence(a.GetAtomicNum()), valence, num bonds, num lonepairs, iscorrect") + if debug >= 2: print(ich,i,a.GetSymbol(),a.GetFormalCharge(),pt.GetDefaultValence(a.GetAtomicNum()),valence,int(bonds),int(lonepairs),iscorrect) - # Creates the charge_state - try: - ch_state = charge_state(iscorrect, total_charge, atom_charge, mols[0], smiles, ich, allow) - except Exception as exc: - if debug >= 1: print(f" GETCHARGE: EXCEPTION in charge_state creation: {exc}") - - return ch_state + return iscorrect ####################################################### -def get_list_of_charges_to_try(spec: list, prot: object, debug: int=0) -> list: +def get_list_of_charges_to_try(prot: object, debug: int=0) -> list: + ### Determines which charges are worth trying for a given specie and a protonation state lchar = [] + spec = prot.parent #### Educated Guess on the Maximum Charge one can expect from the spec[1] - if spec[0] == "molecule": maxcharge = 3 - elif spec[0] == "ligand": + if spec.subtype == "molecule": maxcharge = 3 + elif spec.subtype == "ligand": count_non_connected_O = 0 - for a in spec[1].atoms: + for a in spec.atoms: if a.label == "O" and a.mconnec == 0 and a.connec == 1: count_non_connected_O += 1 - if not spec[1].hapticity: - maxcharge = spec[1].denticity + count_non_connected_O - prot.added_atoms - if debug >= 2: print(f"MAXCHARGE: maxcharge set at {maxcharge} with {spec[1].denticity}+{count_non_connected_O}-{prot.added_atoms}") - else: - maxcharge = 2 + if not spec.is_haptic: + maxcharge = spec.denticity + count_non_connected_O - prot.added_atoms + if debug >= 2: print(f"MAXCHARGE: maxcharge set at {maxcharge} with {spec.denticity}+{count_non_connected_O}-{prot.added_atoms}") + else: maxcharge = 2 + # Cases of same atom being connected to more than one metal - if any(a.mconnec >= 2 for a in spec[1].atoms): - pass - else: - if maxcharge > spec[1].natoms: maxcharge = spec[1].natoms - if maxcharge > 4: maxcharge = 4 - if maxcharge < 2: maxcharge = 2 + if any(a.mconnec >= 2 for a in spec.atoms): pass + else: if maxcharge > spec.natoms: maxcharge = spec.natoms + if maxcharge > 4: maxcharge = 4 ## At most, we try range(-4,5,1) + if maxcharge < 2: maxcharge = 2 ## At leaest, we try range(-2,3,1) if debug >= 2: print(f"MAXCHARGE: maxcharge set at {maxcharge}") # Defines list of charges that will try for magn in range(0, int(maxcharge + 1)): - if magn == 0: - signlist = [1] - elif magn != 0: - signlist = [-1, 1] + if magn == 0: signlist = [1] + elif magn != 0: signlist = [-1, 1] for sign in signlist: ich = int(magn * sign) lchar.append(ich) @@ -745,15 +747,597 @@ def eval_chargelist(atom_charges: list, debug: int=0) -> Tuple[np.ndarray, np.nd else: zwitt = False return abstotal, abs_atcharge, zwitt + +####################################################### +def check_carbenes(atom: object, ligand: object, molecule: object, debug: int=0) -> Tuple[bool, str, int, int]: + # Function that determines whether a given connected "atom" of a "ligand" of a "molecule" is a carbene + # This function is in progress. Ideally, should be able to identify Fischer, Schrock and N-Heterocyclic Carbenes + # The former two cases probably require rules that involve other ligands in the molecule, hence why the "molecule" is provided + #:return iscarbene: Boolean variable. True/False + #:return element: Type of element that will be later added in the "add_atom" function below + #:return addedlist: List of integers which track in which atom of the ligand we're adding "elements" + #:return metal_electrons: List of integers, similar to addedlist, which track in which atom of the ligand we're counting on metal_electrons. + + # about Metal electrons: This variable is a way to contemplate cases in which the metal atom is actually contributing with electrons to the metal-ligand bond. + # about Metal electrons: In reality, I'm not sure about how to use it correctly, and now is used without much chemical sense + + iscarbene = False + element = "H" + addedlist = 0 + metal_electrons = 0 + + # Initial attempt with Carbenes, but they are much more complex + # Looks for Neighbouring N atoms + list_of_coord_atoms = [] + for i in atom.adjacency: + list_of_coord_atoms.append(ligand.labels[i]) + numN = list_of_coord_atoms.count("N") + + if numN == 2: # it is an N-Heterocyclic carbenes + iscarbene = True + element = "H" + addedlist = 1 + + return iscarbene, element, addedlist, metal_electrons + +####################################################### +def get_metal_poscharges(metal: object, debug: int=0) -> list: + ## Retrieves plausible oxidation states for a given metal + # Data Obtained from: + # Venkataraman, D.; Du, Y.; Wilson, S. R.; Hirsch, K. A.; Zhang, P.; Moore, J. S. A + # Coordination Geometry Table of the D-Block Elements and Their Ions. + # J. Chem. Educ. 1997, 74, 915. + + atnum = elemdatabase.elementnr[metal.label] + + at_charge = defaultdict(list) + # 1st-row transition metals. + at_charge[21] = [3] # Sc + at_charge[22] = [2, 3, 4] # Ti + at_charge[23] = [1, 2, 3, 4, 5] # V + at_charge[24] = [0, 2, 3] # Cr ; including 5 leads to worse results + at_charge[25] = [1, 2, 3] # Mn + at_charge[26] = [2, 3] # Fe + at_charge[27] = [1, 2, 3] # Co + at_charge[28] = [2, 3] # Ni + at_charge[29] = [1, 2] # Cu + at_charge[30] = [2] # Zn + # 2nd-row transition metals. + at_charge[39] = [3] # Y + at_charge[40] = [2, 3, 4] # Zr + at_charge[41] = [1, 3, 4, 5] # Nb + at_charge[42] = [0, 2, 4, 5, 6] # Mo + at_charge[43] = [1, 2, 3, 4, 5] # Tc + at_charge[44] = [2, 3, 4] # Ru + at_charge[45] = [1, 2, 3] # Rh + at_charge[46] = [0, 2] # Pd + at_charge[47] = [1] # Ag + at_charge[48] = [2] # Cd + # 3rd-row transition metals. + at_charge[57] = [] # La + at_charge[72] = [4] # Hf + at_charge[73] = [2, 3, 4, 5] # Ta + at_charge[74] = [0, 2, 4, 5, 6] # W + at_charge[75] = [1, 2, 3, 4, 5, 7] # Re + at_charge[76] = [2, 3, 4, 5, 6] # Os + at_charge[77] = [1, 3] # Ir + at_charge[78] = [0, 2, 4] # Pt + at_charge[79] = [1, 3] # Au + at_charge[80] = [2] # Hg + + poscharges = at_charge[atnum] + + list_of_zero_OS = ["Fe", "Ni", "Ru"] + if metal.label in list_of_zero_OS: + # In some cases, it adds 0 as possible metal charge + # -if it has CO ligands + if any((lig.natoms == 2 and "C" in lig.labels and "O" in lig.labels) for lig in metal.parent.ligands): + if int(0) not in poscharges: + poscharges.append(int(0)) + # -if it has any ligand with hapticity + if any((lig.is_haptic) for lig in metal.parent.ligands): + if int(0) not in poscharges: + poscharges.append(int(0)) + + return poscharges + +####################################################### +def balance_charge(unique_indices: list, unique_species: list, debug: int=0) -> list: + + # Function to Select the Best Charge Distribution for the unique species. + # It accepts multiple charge options for each molecule/ligand/metal (poscharge, etc...). + # NO: It should select the best one depending on whether the final metal charge makes sense or not. + # In some cases, can accept metal oxidation state = 0, if no other makes sense + + iserror = False + iterlist = [] + for idx, spec_tuple in enumerate(unique_species): + spec = spec_tuple[1] + toadd = [] + if len(spec.possible_cs) == 1: + toadd.append(spec.possible_cs[0]) + elif len(spec.possible_cs) > 1: + for tch in spec.possible_cs: + toadd.append(tch) + elif len(spec.possible_cs) == 0: + iserror = True + toadd.append("-") + iterlist.append(toadd) + + if debug >= 2: print("BALANCE: iterlist", iterlist) + if debug >= 2: print("BALANCE: unique_indices", unique_indices) + + if not iserror: + tmpdistr = list(itertools.product(*iterlist)) + if debug >= 2: print("BALANCE: tmpdistr", tmpdistr) + + # Expands tmpdistr to include same species, generating alldistr: + alldistr = [] + for distr in tmpdistr: + tmp = [] + for u in unique_indices: + tmp.append(distr[u]) + alldistr.append(tmp) + if debug >= 2: print("BALANCE: alldistr added:", tmp) + + final_charge_distribution = [] + for idx, d in enumerate(alldistr): + final_charge = np.sum(d) + if final_charge == 0: + final_charge_distribution.append(d) + elif iserror: + if debug >= 1: print("Error found in BALANCE: one species has no possible charges") + final_charge_distribution = [] + + return final_charge_distribution + +####################################################### +def prepare_unresolved(unique_indices: list, unique_species: list, distributions: list, debug: int=0): + list_molecules = [] + list_indices = [] + list_options = [] + if debug >= 2: print("") + + # spec_tuple[0] is the subtype of the specie + # spec_tuple[1] is the specie object + # spec_tuple[2] is the molecule object to which the specie belongs + for idx, spec_tuple in enumerate(unique_species): + if spec_tuple[0] == "metal": + position = [jdx for jdx, uni in enumerate(unique_indices) if uni == idx] + if debug >= 2: print(f"UNRESOLVED: found metal in positions={position} of the distribution") + values = [distr[position[0]] for distr in distributions] + options = list(set(values)) + if debug >= 2: print(f"UNRESOLVED: list of values={values}\n") + if debug >= 2: print(f"UNRESOLVED: options={options}\n") + + if len(options) > 1: + list_molecules.append(spec_tuple[2]) + list_indices.append(spec_tuple[1].parent_index) + list_options.append(options) + + return list_molecules, list_indices, list_options + +####################################################### +def prepare_mols(moleclist: list, unique_indices: list, unique_species: list, selected_charge_states: list, final_charge_distribution: list, debug: int=0) -> Tuple[list, bool]: + # The charge and connectivity of a given specie in the unit cell is only determined for one representative case. i + # For instance, if four molecules "A" are in the unit cell, only one is evaluated in the rest of the code. + # This function ensures that all other "A" molecules in the unit cell end up having the same interpretation (charge and connectivity). + # In some cases, this might be a difficult job, since the connectivity (i.e. Lewis Structure) often depends on the atom ordering, which might change + # Thus, an Hungarian ordering is implemented. + + Warning = False + idxtoallocate = 0 + # prints SUMMARY at start + if debug >= 1: + print(f"PREPARE: {len(selected_charge_states)} selected_charge_states received") + print("") + print(f"PREPARE: {len(moleclist)} molecules to prepare, of types") + for idx, mol in enumerate(moleclist): + print(f"PREPARE: Molecule {idx} is a {mol.subtype} with formula {mol.formula}"), + print("") + + for idx, sel in enumerate(selected_charge_states): + for jdx, opt in enumerate(sel): + chstate = opt[0] + prot = opt[1] + if debug >= 2: print(f"PREPARE: State {idx} and option {jdx}. Target state and protonation received with {chstate.corr_total_charge} and {prot.added_atoms}") + + for idx, mol in enumerate(moleclist): + if debug >= 2: print("") + if debug >= 2: print(f"PREPARE: Molecule {idx} is a {mol.subtype} with formula {mol.formula}"), + + ################################### + ### FOR SOLVENT AND COUNTERIONS ### + ################################### + if mol.subtype == "molecule" and mol.iscomplex == False : + specie = unique_indices[idxtoallocate] + spec_object = unique_species[specie][1] + if debug >= 2: print(f"PREPARE: Specie number={specie} with molecule poscharges: {spec_object.possible_cs}") + if debug >= 2: print(f"PREPARE: Doing molecule {idx} with idxtoallocate: {idxtoallocate}") + + allocated = False + for jdx, ch in enumerate(spec_object.possible_cs): + if final_charge_distribution[idxtoallocate] == ch and not allocated: # If the charge in poscharges coincides with the one for this entry in final_distribution + + tgt_charge_state = selected_charge_states[specie][jdx][0] + tgt_protonation = selected_charge_states[specie][jdx][1] + if debug >= 2: print(f"PREPARE: target state and protonation loaded, with {tgt_charge_state.corr_total_charge} and {tgt_protonation.added_atoms}") + allocated = True + ch_state = get_charge(mol.labels, mol.coord, mol.conmat, ch, mol.cov_factor) + ch_state.correction(tgt_protonation.addedlist, tgt_protonation.metal_electrons, tgt_protonation.elemlist) + + if ch_state.corr_total_charge == tgt_charge_state.corr_total_charge: + mol.charge(ch_state.corr_atom_charges, ch_state.corr_total_charge, ch_state.rdkit_mol, ch_state.smiles) + if debug >= 2: print(f"PREPARE: Success doing molecule {idx}. Created Charge State with total_charge={ch_state.corr_total_charge}") + else: + if debug >= 2: print(f"PREPARE: Error doing molecule {idx}. Created Charge State is different than Target") + Warning = True + + if allocated: + idxtoallocate += 1 + else: + idxtoallocate += 1 + if debug >= 2: print(f"PREPARE: Warning allocating molecule {idx} with {final_charge_distribution[idxtoallocate]} as target charge") + Warning = True + + ########################### + ###### FOR LIGANDS ###### + ########################### + elif mol.iscomplex : + if debug >= 2: print(f"PREPARE: Molecule {moleclist.index(mol)} has {len(mol.ligands)} ligands") + + for kdx, lig in enumerate(mol.ligands): + #while not Warning: + #try: + allocated = False + specie = unique_indices[idxtoallocate] + spec_object = unique_species[specie][1] + if debug >= 2: print("") + if debug >= 2: print(f"PREPARE: Ligand {kdx}, formula: {lig.formula} is specie {specie}") + if debug >= 2: print(f"PREPARE: Ligand poscharges: {spec_object.possible_cs}") + if debug >= 2: print(f"PREPARE: Doing ligand {kdx} with idxtoallocate {idxtoallocate}") + + for jdx, ch in enumerate(spec_object.possible_cs): + if debug >= 2: print(f"PREPARE: Doing {ch} of options {spec_object.possible_cs}. jdx={jdx}") + tgt_charge_state = selected_charge_states[specie][jdx][0] + tgt_protonation = selected_charge_states[specie][jdx][1] + if debug >= 2: print(f"PREPARE: Found Target Prot State with {tgt_protonation.added_atoms} added atoms and {tgt_protonation.os} OS") + if debug >= 2: print(f"PREPARE: addedlist of Target Prot State: {tgt_protonation.addedlist}") + + if final_charge_distribution[idxtoallocate] == ch and not allocated: + allocated = True + + # RE-RUNS the Charge assignation for same-type molecules in the cell + list_of_protonations = get_protonation_states(lig, debug=debug) + # list_of_protonations = define_sites(lig, mol, debug=1) + found_prot = False + + # Hungarian sort + issorted = False + if not lig.hapticity: + # Adding connectivity data to labels to improve the hungarian sort + ligand_data = [] + ref_data = [] + for a in lig.atoms: + ligand_data.append(a.label+str(a.mconnec)) + for a in spec_object.atoms: + ref_data.append(a.label+str(a.mconnec)) + dummy1, dummy2, map12 = reorder(ref_data, ligand_data, spec_object.coord, lig.coord) + + issorted = True + ############### + + for p in list_of_protonations: + if debug >= 2: print(f"PREPARE: evaluating prot state with added_atoms={p.added_atoms}")#, addedlist={p.addedlist}") + if p.os == tgt_protonation.os and p.added_atoms == tgt_protonation.added_atoms and not found_prot: + if issorted: + tmp_addedlist = list(np.array(p.addedlist)[map12]) + else: + tmp_addedlist = p.addedlist + if debug >= 2: print(f"PREPARE: tmp_addedlist={tmp_addedlist}") + if all(tmp_addedlist[idx] == tgt_protonation.addedlist[idx] for idx in range(len(p.addedlist))): + if debug >= 2: print(f"PREPARE: found match in protonation with tmpsmiles:{p.tmpsmiles}") + prot = p + found_prot = True + + #### Evaluates possible charges except if the ligand is a nitrosyl + if found_prot: + if lig.is_nitrosyl: + if lig.NO_type == "Linear": NOcharge = 1 #NOcharge is the charge with which I need to run getcharge to make it work + if lig.NO_type == "Bent": NOcharge = 0 + + ch_state = get_charge(prot.labels, prot.coords, prot.conmat, NOcharge, prot.cov_factor) + ch_state.correction(prot.addedlist, prot.metal_electrons, prot.elemlist) + + if debug >= 2: print(f"PREPARE: Found Nitrosyl of type= {lig.NO_type}") + if debug >= 2: print(f"PREPARE: Wanted charge {ch}, obtained: {ch_state.corr_total_charge}") + if debug >= 2: print(f"PREPARE: smiles: {ch_state.smiles}") + else: + if debug >= 2: print(f"PREPARE: Sending getcharge with prot.added_atoms={prot.added_atoms} to obtain charge {ch}") + ch_state = get_charge(prot.labels, prot.coords, prot.conmat, ch+prot.added_atoms, prot.cov_factor, tgt_charge_state.allow) + ch_state.correction(prot.addedlist, prot.metal_electrons, prot.elemlist) + + if debug >= 2: print(f"PREPARE: Wanted charge {ch}, obtained: {ch_state.corr_total_charge}") + if debug >= 2: print(f"PREPARE: smiles: {ch_state.smiles}") + + if ch_state.corr_total_charge != ch: + if debug >= 1: print(f"PREPARE: WARNING: total charge obtained without correction {ch_state.corr_total_charge} while it should be {ch}") + Warning = True + else: + lig.charge(ch_state.corr_atom_charges, ch_state.corr_total_charge, ch_state.rdkit_mol, ch_state.smiles) + if debug >= 1: print(f"PREPARE: Success doing ligand {kdx}. Created Charge State with total_charge={ch_state.corr_total_charge}") + + else: + if debug >= 1: print(f"PREPARE: WARNING, I Could not identify the protonation state. I'll try to obtain the desired result") + found_charge_state = False + for prot in list_of_protonations: + list_of_charge_states = [] + list_of_protonations_for_each_state = [] + + tmpobject = ["Ligand", lig, mol] + chargestried = get_list_of_charges_to_try(spec, prot) + for ich in chargestried: + ch_state = get_charge(prot.labels, prot.coords, prot.conmat, ich, prot.cov_factor) + ch_state.correction(prot.addedlist, prot.metal_electrons, prot.elemlist) + list_of_charge_states.append(ch_state) + list_of_protonations_for_each_state.append(prot) + if debug >= 1: print(f" POSCHARGE: charge 0 with smiles {ch_state.smiles}") + + if len(list_of_charge_states) > 0: + best_charge_distr_idx = select_charge_distr(list_of_charge_states, debug=debug) + else: + if debug >= 1: print(f" POSCHARGE. found EMPTY best_charge_distr_idx for PROTONATION state") + best_charge_distr_idx = [] + + if debug >= 2: print(f" POSCHARGE. best_charge_distr_idx={best_charge_distr_idx}") + for idx in best_charge_distr_idx: + c = list_of_charge_states[idx] + p = list_of_protonations_for_each_state[idx] + if debug >= 2: print(f" POSCHARGE. {c.corr_total_charge}={ch}, {p.added_atoms}={tgt_protonation.added_atoms}") + if c.corr_total_charge == ch and p.added_atoms == tgt_protonation.added_atoms: + lig.charge(c.corr_atom_charges, c.corr_total_charge, c.rdkit_mol, c.smiles) + if debug >= 1: print(f"PREPARE: Success doing ligand {kdx}. Created Charge State with total_charge={c.corr_total_charge}") + found_charge_state = True + + if not found_charge_state: Warning = True + if allocated: + idxtoallocate += 1 + else: + idxtoallocate += 1 + if debug >= 1: print(f"PREPARE: Warning allocating molecule {idx} with {final_charge_distribution[idxtoallocate]} as target charge") + Warning = True + + for kdx, met in enumerate(mol.metals): + specie = unique_indices[idxtoallocate] + spec_object = unique_species[specie][1] + allocated = False + if debug >= 2: print("") + if debug >= 2: print(f"PREPARE: Metal {kdx}, label {met.label} is specie {specie}") + if debug >= 2: print(f"PREPARE: Metal possible_css: {spec_object.possible_cs}") + for jdx, ch in enumerate(spec_object.possible_cs): + if final_charge_distribution[idxtoallocate] == ch and not allocated: + allocated = True + met.charge(ch) + if allocated: + idxtoallocate += 1 + + if not Warning: + # Now builds the Charge Data for the final molecule. Smiles is a list with all ligand smiles separately. + if debug >= 2: print(f"PREPARE: Building Molecule {idx} From Ligand&Metal Information") + tmp_atcharge = np.zeros((mol.natoms)) + tmp_smiles = [] + for lig in mol.ligands: + tmp_smiles.append(lig.smiles) + for kdx, a in enumerate(lig.parent_indices): + tmp_atcharge[a] = lig.atcharge[kdx] + for met in mol.metals: + tmp_atcharge[met.atlist] = met.totcharge + + mol.charge(tmp_atcharge, int(sum(tmp_atcharge)), [], tmp_smiles) + + return moleclist, Warning + +####################################################### +def build_bonds(moleclist: list, debug: int=0) -> list: + ## Builds bond data for all molecules + ## Now that charges are known, we use the rdkit-objects with the correct charge to do that + ## Bond entries are defined in the mol and lig objects + + ####### + # First Part. Creates Bonds for Non-Complex Molecules + ####### + if debug >= 2: print("") + if debug >= 2: print("BUILD_BONDS: Doing 1st Part") + if debug >= 2: print("###########################") + for mol in moleclist: + if not mol.iscomplex: + if debug >= 2: print(f"BUILD BONDS: doing mol with mol.natoms={mol.natoms}") + # Checks that the gmol and rdkit-mol objects have same order + for idx, a in enumerate(mol.atoms): + # Security Check. Confirms that the labels are the same + #if debug >= 2: print("BUILD BONDS: atom", idx, a.label) + rdkitatom = mol.rdkit_mol.GetAtomWithIdx(idx) + tmp = rdkitatom.GetSymbol() + if a.label != tmp: print("Error in Build_Bonds. Atom labels do not coincide. GMOL vs. MOL:", a.label, tmp) + else: + # First part. Creates bond information + for b in rdkitatom.GetBonds(): + bond_startatom = b.GetBeginAtomIdx() + bond_endatom = b.GetEndAtomIdx() + bond_order = b.GetBondTypeAsDouble() + if mol.atoms[bond_endatom].label != mol.rdkit_mol.GetAtomWithIdx(bond_endatom).GetSymbol(): + if debug >= 1: print("Error with Bond EndAtom",mol.atoms[bond_endatom].label,mol.rdkit_mol.GetAtomWithIdx(bond_endatom).GetSymbol()) + else: + if bond_endatom == idx: + start = bond_endatom + end = bond_startatom + elif bond_startatom == idx: + start = bond_startatom + end = bond_endatom + new_bond = bond(mol.atoms[start], mol.atoms[end], bond_order) ## This has changed. Now there is a bond object, and we send the atom objects, not only the index + a.add_bond(new_bond) + + if debug >= 2: print("") + if debug >= 2: print("BUILD_BONDS: Doing 2nd Part") + if debug >= 2: print("###########################") + + ####### + # 2nd Part. Creates Ligand Information + ####### + for mol in moleclist: + if debug >= 2: print(f"BUILD BONDS: doing mol {mol.formula} with Natoms {mol.natoms}") + if mol.iscomplex: + for lig in mol.ligands: + if debug >= 2: print(f"BUILD BONDS: doing ligand {lig.formula}") + + for idx, a in enumerate(lig.atoms): + # Security Check. Confirms that the labels are the same + rdkitatom = lig.rdkit_mol.GetAtomWithIdx(idx) + tmp = rdkitatom.GetSymbol() + if a.label != tmp: print(f"Error in Build_Bonds. Atom labels do not coincide. GMOL vs. MOL: {a.label} {tmp}") + else: + # First part. Creates bond information + for b in rdkitatom.GetBonds(): + bond_startatom = b.GetBeginAtomIdx() + bond_endatom = b.GetEndAtomIdx() + bond_order = b.GetBondTypeAsDouble() + if bond_startatom >= lig.natoms or bond_endatom >= lig.natoms: + continue + else: + if lig.atoms[bond_endatom].label != lig.rdkit_mol.GetAtomWithIdx(bond_endatom).GetSymbol(): + if debug >= 1: print( "Error with Bond EndAtom",lig.atoms[bond_endatom].label,lig.rdkit_mol.GetAtomWithIdx(bond_endatom).GetSymbol()) + else: + if bond_endatom == idx: + start = bond_endatom + end = bond_startatom + elif bond_startatom == idx: + start = bond_startatom + end = bond_endatom + new_bond = bond(mol.atoms[start], mol.atoms[end], bond_order) ## This has changed. Now there is a bond object, and we send the atom objects, not only the index + a.add_bond(new_bond) + + if debug >= 2: print("") + if debug >= 2: print("BUILD_BONDS: Doing 3rd Part") + if debug >= 2: print("###########################") + + ## SERGI: I need to work on that, but I'll do something first + + ######## + ## 3rd Part. Merges Ligand Information into Molecule Object using the atlists + ######## + #for mol in moleclist: + # if debug >= 2: print("BUILD BONDS: doing mol", mol.formula, "with Natoms", mol.natoms) + # if mol.iscomplex: + # allstarts = [] + # allends = [] + # allorders = [] + # # Adds atoms within ligands + # for lig in mol.ligandlist: + # for a in lig.atoms: + # for b in a.bond: + # allstarts.append(lig.atlist[b[0]]) + # allends.append(lig.atlist[b[1]]) + # allorders.append(b[2]) + + # # Adds Metal-Ligand Bonds, with an arbitrary 0.5 order: + # for idx, row in enumerate(mol.mconmat): + # # if debug >= 2: print(row) + # for jdx, val in enumerate(row): + # if val > 0: + # # if debug >= 2: print(idx, jdx, val) + # allstarts.append(idx) + # allends.append(jdx) + # allorders.append(0.5) + + # # I sould work to add Metal-Metal Bonds. Would need to work on the Metal class: + # # Finally, puts everything together, and creates bonds for MOLECULE atom objects + # for idx, a in enumerate(mol.atoms): + # starts = [] + # ends = [] + # orders = [] + # group = [] + # for entry in zip(allstarts, allends, allorders): + # if entry[0] == idx or entry[1] == idx: + # if entry not in group and (entry[1], entry[0], entry[2]) not in group: + # starts.append(entry[0]) + # ends.append(entry[1]) + # orders.append(entry[2]) + # group.append(entry) + + # a.bonds(starts, ends, orders) + + ####### + # 4th Part. Corrects Ligand Smiles to Remove Added H atoms, the old smiles is stored in "lig.smiles_with_H" as it can still be useful + ####### + for mol in moleclist: + if debug >= 2: print("BUILD BONDS: doing mol", mol.formula, "with Natoms", mol.natoms) + if mol.iscomplex : + mol.smiles_with_H = [lig.smiles for lig in mol.ligands] + mol.smiles = [correct_smiles_ligand(lig)[0] for lig in mol.ligands] + # for lig in mol.ligands: + # lig.smiles_with_H = lig.smiles + # lig.smiles, lig.rdkit_mol = correct_smiles_ligand(lig) + # mol.smiles.append(lig.smiles) + # mol.smiles_with_H.append(lig.smiles_with_H) + + return moleclist + +####################################################### +def correct_smiles_ligand(ligand: object): + ## Receives a ligand class object and constructs the smiles and the rdkit_mol object from scratch, using atoms and bond information + + Chem.rdmolops.SanitizeFlags.SANITIZE_NONE + #### Creates an empty editable molecule + rwlig = Chem.RWMol() + + # Adds atoms with their formal charge + for jdx, atom in enumerate(ligand.atoms): + rdkit_atom = Chem.Atom(atom.atnum) + rdkit_atom.SetFormalCharge(int(atom.charge)) + rdkit_atom.SetNoImplicit(True) + rwlig.AddAtom(rdkit_atom) + + # Sets bond information and hybridization + for jdx, atom in enumerate(ligand.atoms): + nbonds = 0 + for bond in atom.bonds: + nbonds += 1 + if bond.order== 1.0: btype = Chem.BondType.SINGLE + elif bond.order == 2.0: btype = Chem.BondType.DOUBLE + elif bond.order == 3.0: btype = Chem.BondType.TRIPLE + elif bond.order == 1.5: + btype = Chem.BondType.AROMATIC + rdkit_atom.SetIsAromatic(True) + if bond.atom1 == jdx and bond.atom2 > jdx: rwlig.AddBond(bond.atom1, bond.atom2, btype) + + if nbonds == 1: hyb = Chem.HybridizationType.S + elif nbonds == 2: hyb = Chem.HybridizationType.SP + elif nbonds == 3: hyb = Chem.HybridizationType.SP2 + elif nbonds == 4: hyb = Chem.HybridizationType.SP3 + else: hyb = Chem.HybridizationType.UNSPECIFIED + rdkit_atom.SetHybridization(hyb) + + # Creates Molecule + obj = rwlig.GetMol() + smiles = Chem.MolToSmiles(obj) + + Chem.SanitizeMol(obj) + Chem.DetectBondStereochemistry(obj, -1) + Chem.AssignStereochemistry(obj, flagPossibleStereoCenters=True, force=True) + Chem.AssignAtomChiralTagsFromStructure(obj, -1) + + return smiles, obj + ####################################################### ################### ### NEW OBJECTS ### ################### class protonation(object): - def __init__(self, labels, coordinates, cov_factor, added_atoms, addedlist, block, metal_electrons, elemlist, tmpsmiles=" ", os=int(0), typ="Local"): + def __init__(self, labels, cov_factor, added_atoms, addedlist, block, metal_electrons, elemlist, tmpsmiles=" ", os=int(0), typ="Local", parent: object=None): self.labels = labels - self.coordinates = coordinates + self.natoms = len(labels) + self.coords = coords self.added_atoms = added_atoms self.addedlist = addedlist self.block = block @@ -763,9 +1347,11 @@ def __init__(self, labels, coordinates, cov_factor, added_atoms, addedlist, bloc self.cov_factor = cov_factor self.os = os self.tmpsmiles = tmpsmiles + self.atnums = [elemdatabase.elementnr[l] for l in labels] # from xyz2mol + self.parent = parent self.radii = get_radii(labels) - self.status, self.adjmat, self.adjnum = get_adjmatrix(self.labels, self.coordinates, self.cov_factor, self.radii) + self.status, self.adjmat, self.adjnum = get_adjmatrix(self.labels, self.coords, self.cov_factor, self.radii) ####################################################### @@ -812,62 +1398,3 @@ def correction(self, addedlist, metal_electrons, elemlist): ####################################################### -def get_metal_poscharges(metal: object, debug: int=0) -> list: - ## Retrieves plausible oxidation states for a given metal - # Data Obtained from: - # Venkataraman, D.; Du, Y.; Wilson, S. R.; Hirsch, K. A.; Zhang, P.; Moore, J. S. A - # Coordination Geometry Table of the D-Block Elements and Their Ions. - # J. Chem. Educ. 1997, 74, 915. - - atnum = elemdatabase.elementnr[metal.label] - - at_charge = defaultdict(list) - # 1st-row transition metals. - at_charge[21] = [3] # Sc - at_charge[22] = [2, 3, 4] # Ti - at_charge[23] = [1, 2, 3, 4, 5] # V - at_charge[24] = [0, 2, 3] # Cr ; including 5 leads to worse results - at_charge[25] = [1, 2, 3] # Mn - at_charge[26] = [2, 3] # Fe - at_charge[27] = [1, 2, 3] # Co - at_charge[28] = [2, 3] # Ni - at_charge[29] = [1, 2] # Cu - at_charge[30] = [2] # Zn - # 2nd-row transition metals. - at_charge[39] = [3] # Y - at_charge[40] = [2, 3, 4] # Zr - at_charge[41] = [1, 3, 4, 5] # Nb - at_charge[42] = [0, 2, 4, 5, 6] # Mo - at_charge[43] = [1, 2, 3, 4, 5] # Tc - at_charge[44] = [2, 3, 4] # Ru - at_charge[45] = [1, 2, 3] # Rh - at_charge[46] = [0, 2] # Pd - at_charge[47] = [1] # Ag - at_charge[48] = [2] # Cd - # 3rd-row transition metals. - at_charge[57] = [] # La - at_charge[72] = [4] # Hf - at_charge[73] = [2, 3, 4, 5] # Ta - at_charge[74] = [0, 2, 4, 5, 6] # W - at_charge[75] = [1, 2, 3, 4, 5, 7] # Re - at_charge[76] = [2, 3, 4, 5, 6] # Os - at_charge[77] = [1, 3] # Ir - at_charge[78] = [0, 2, 4] # Pt - at_charge[79] = [1, 3] # Au - at_charge[80] = [2] # Hg - - poscharges = at_charge[atnum] - - list_of_zero_OS = ["Fe", "Ni", "Ru"] - if metal.label in list_of_zero_OS: - # In some cases, it adds 0 as possible metal charge - # -if it has CO ligands - if any((lig.natoms == 2 and "C" in lig.labels and "O" in lig.labels) for lig in metal.parent.ligands): - if int(0) not in poscharges: - poscharges.append(int(0)) - # -if it has any ligand with hapticity - if any((lig.is_haptic) for lig in metal.parent.ligands): - if int(0) not in poscharges: - poscharges.append(int(0)) - - return poscharges \ No newline at end of file