From 3161d4777c4ace103012c94bd3340fb0e7d0a931 Mon Sep 17 00:00:00 2001 From: Sergi Vela Date: Fri, 23 Feb 2024 17:55:26 +0100 Subject: [PATCH] changes before prepare_mols --- cell2mol/c2m_driver.py | 2 +- cell2mol/cell_operations.py | 35 + ..._reconstruct.py => cell_reconstruction.py} | 0 cell2mol/cellconversions.py | 187 -- ..._formal_charge.py => charge_assignment.py} | 278 +-- cell2mol/classes.py | 100 +- cell2mol/formal_charge.py | 1669 ----------------- cell2mol/old_classes.py | 489 ----- cell2mol/{readwrite.py => read_write.py} | 0 cell2mol/sergi_classes.py | 772 -------- cell2mol/sergi_connectivity.py | 369 ---- cell2mol/sergi_reconstruct.py | 697 ------- cell2mol/tmcharge_common.py | 1085 ----------- 13 files changed, 222 insertions(+), 5461 deletions(-) rename cell2mol/{cell_reconstruct.py => cell_reconstruction.py} (100%) delete mode 100755 cell2mol/cellconversions.py rename cell2mol/{yuri_formal_charge.py => charge_assignment.py} (87%) delete mode 100644 cell2mol/formal_charge.py delete mode 100755 cell2mol/old_classes.py rename cell2mol/{readwrite.py => read_write.py} (100%) delete mode 100644 cell2mol/sergi_classes.py delete mode 100644 cell2mol/sergi_connectivity.py delete mode 100644 cell2mol/sergi_reconstruct.py delete mode 100755 cell2mol/tmcharge_common.py diff --git a/cell2mol/c2m_driver.py b/cell2mol/c2m_driver.py index 95ba6894..15475b84 100644 --- a/cell2mol/c2m_driver.py +++ b/cell2mol/c2m_driver.py @@ -8,7 +8,7 @@ from cell2mol.c2m_module import save_cell, cell2mol from cell2mol.cif2info import cif_2_info from cell2mol.classes import * -from cell2mol.readwrite import readinfo +from cell2mol.cell2mol.read_write import readinfo if __name__ != "__main__" and __name__ != "cell2mol.c2m_driver": sys.exit(1) diff --git a/cell2mol/cell_operations.py b/cell2mol/cell_operations.py index 4fd78c81..76277bd8 100755 --- a/cell2mol/cell_operations.py +++ b/cell2mol/cell_operations.py @@ -116,3 +116,38 @@ def cart2frac(cartCoords, cellvec): newcoord.append([float(newx), float(newy), float(newz)]) return newcoord + +####################################################### +def translate(vector, coords, cellvec): + """ Translate coordinates by a vector + Args: + vector (list): list of vector components + coords (list): list of coordinates + cellvec (list): list of cell vectors + Returns: + newcoord (list): list of translated coordinates + """ + + newcoord = [] + for idx, coord in enumerate(coords): + newx = ( + coord[0] + + vector[0] * cellvec[0][0] + + vector[1] * cellvec[1][0] + + vector[2] * cellvec[2][0] + ) + newy = ( + coord[1] + + vector[0] * cellvec[0][1] + + vector[1] * cellvec[1][1] + + vector[2] * cellvec[2][1] + ) + newz = ( + coord[2] + + vector[0] * cellvec[0][2] + + vector[1] * cellvec[1][2] + + vector[2] * cellvec[2][2] + ) + newcoord.append([float(newx), float(newy), float(newz)]) + + return newcoord \ No newline at end of file diff --git a/cell2mol/cell_reconstruct.py b/cell2mol/cell_reconstruction.py similarity index 100% rename from cell2mol/cell_reconstruct.py rename to cell2mol/cell_reconstruction.py diff --git a/cell2mol/cellconversions.py b/cell2mol/cellconversions.py deleted file mode 100755 index d6c79a3d..00000000 --- a/cell2mol/cellconversions.py +++ /dev/null @@ -1,187 +0,0 @@ -#!/usr/bin/env python - -import numpy as np - -####################################################### -def frac2cart_fromcellvec(frac_coord, cellvec): - """ Convert fractional coordinates to cartesian coordinates - - Args: - frac_coord (list): list of fractional coordinates - cellvec (list): list of cell vectors - - Returns: - cartesian (list): list of cartesian coordinates - """ - - cartesian = [] - for idx, frac in enumerate(frac_coord): - xcar = ( - frac[0] * cellvec[0][0] + frac[1] * cellvec[1][0] + frac[2] * cellvec[2][0] - ) - ycar = ( - frac[0] * cellvec[0][1] + frac[1] * cellvec[1][1] + frac[2] * cellvec[2][1] - ) - zcar = ( - frac[0] * cellvec[0][2] + frac[1] * cellvec[1][2] + frac[2] * cellvec[2][2] - ) - cartesian.append([float(xcar), float(ycar), float(zcar)]) - return cartesian - - -####################################################### -def frac2cart_fromparam(frac_coord, cellparam): - """ Convert fractional coordinates to cartesian coordinates - - Args: - frac_coord (list): list of fractional coordinates - cellparam (list): list of cell parameters - - Returns: - cartesian (list): list of cartesian coordinates - """ - - a = cellparam[0] - b = cellparam[1] - c = cellparam[2] - alpha = np.radians(cellparam[3]) - beta = np.radians(cellparam[4]) - gamma = np.radians(cellparam[5]) - - volume = ( - a - * b - * c - * np.sqrt( - 1 - - np.cos(alpha) ** 2 - - np.cos(beta) ** 2 - - np.cos(gamma) ** 2 - + 2 * np.cos(alpha) * np.cos(beta) * np.cos(gamma) - ) - ) - - m = np.zeros((3, 3)) - m[0][0] = a - m[0][1] = b * np.cos(gamma) - m[0][2] = c * np.cos(beta) - m[1][0] = 0 - m[1][1] = b * np.sin(gamma) - m[1][2] = c * ((np.cos(alpha) - np.cos(beta) * np.cos(gamma)) / np.sin(gamma)) - m[2][0] = 0 - m[2][1] = 0 - m[2][2] = volume / (a * b * np.sin(gamma)) - - cartesian = [] - for idx, frac in enumerate(frac_coord): - xcar = frac[0] * m[0][0] + frac[1] * m[0][1] + frac[2] * m[0][2] - ycar = frac[0] * m[1][0] + frac[1] * m[1][1] + frac[2] * m[1][2] - zcar = frac[0] * m[2][0] + frac[1] * m[2][1] + frac[2] * m[2][2] - cartesian.append([float(xcar), float(ycar), float(zcar)]) - return cartesian - - -####################################################### -def det3(mat): - """ Calculate the determinant of a 3x3 matrix - - Args: - mat (list): list of 3x3 matrix - Returns: - determinant (float): determinant of the matrix - """ - return ( - (mat[0][0] * mat[1][1] * mat[2][2]) - + (mat[0][1] * mat[1][2] * mat[2][0]) - + (mat[0][2] * mat[1][0] * mat[2][1]) - - (mat[0][2] * mat[1][1] * mat[2][0]) - - (mat[0][1] * mat[1][0] * mat[2][2]) - - (mat[0][0] * mat[1][2] * mat[2][1]) - ) - - -####################################################### -def cart2frac(cartCoords, cellvec): - """ Convert cartesian coordinates to fractional coordinates - - Args: - cartCoords (list): list of cartesian coordinates - cellvec (list): list of cell vectors - Returns: - fracCoords (list): list of fractional coordinates - """ - - latCnt = [x[:] for x in [[None] * 3] * 3] - for a in range(3): - for b in range(3): - latCnt[a][b] = cellvec[b][a] - - fracCoords = [] - detLatCnt = det3(latCnt) - - for i in cartCoords: - aPos = ( - det3( - [ - [i[0], latCnt[0][1], latCnt[0][2]], - [i[1], latCnt[1][1], latCnt[1][2]], - [i[2], latCnt[2][1], latCnt[2][2]], - ] - ) - ) / detLatCnt - bPos = ( - det3( - [ - [latCnt[0][0], i[0], latCnt[0][2]], - [latCnt[1][0], i[1], latCnt[1][2]], - [latCnt[2][0], i[2], latCnt[2][2]], - ] - ) - ) / detLatCnt - cPos = ( - det3( - [ - [latCnt[0][0], latCnt[0][1], i[0]], - [latCnt[1][0], latCnt[1][1], i[1]], - [latCnt[2][0], latCnt[2][1], i[2]], - ] - ) - ) / detLatCnt - fracCoords.append([aPos, bPos, cPos]) - return fracCoords - - -####################################################### -def translate(vector, coords, cellvec): - """ Translate coordinates by a vector - Args: - vector (list): list of vector components - coords (list): list of coordinates - cellvec (list): list of cell vectors - Returns: - newcoord (list): list of translated coordinates - """ - - newcoord = [] - for idx, coord in enumerate(coords): - newx = ( - coord[0] - + vector[0] * cellvec[0][0] - + vector[1] * cellvec[1][0] - + vector[2] * cellvec[2][0] - ) - newy = ( - coord[1] - + vector[0] * cellvec[0][1] - + vector[1] * cellvec[1][1] - + vector[2] * cellvec[2][1] - ) - newz = ( - coord[2] - + vector[0] * cellvec[0][2] - + vector[1] * cellvec[1][2] - + vector[2] * cellvec[2][2] - ) - newcoord.append([float(newx), float(newy), float(newz)]) - - return newcoord diff --git a/cell2mol/yuri_formal_charge.py b/cell2mol/charge_assignment.py similarity index 87% rename from cell2mol/yuri_formal_charge.py rename to cell2mol/charge_assignment.py index c02ba234..7944e336 100644 --- a/cell2mol/yuri_formal_charge.py +++ b/cell2mol/charge_assignment.py @@ -31,71 +31,12 @@ # print("RDKIT Version:", rdBase.rdkitVersion) rdBase.DisableLog("rdApp.*") -############################################################################### -def identify_unique_species(moleclist: list, debug: int=0): - - unique_species = [] - unique_indices = [] - - typelist_mols = [] - typelist_ligs = [] - typelist_mets = [] - - specs_found = -1 - for idx, mol in enumerate(moleclist): - if debug >= 2: print(f"Molecule {idx} formula={mol.formula}") - if not mol.iscomplex: - found = False - for ldx, typ in enumerate(typelist_mols): # Molecules - issame = compare_species(mol, typ[0], debug=1) - if issame : - found = True ; kdx = typ[1] - if debug >= 2: print(f"Molecule {idx} is the same with {ldx} in typelist") - if not found: - specs_found += 1 ; kdx = specs_found - typelist_mols.append(list([mol, kdx])) - unique_species.append(mol) - if debug >= 2: print(f"New molecule found with: formula={mol.formula} and added in position {kdx}") - unique_indices.append(kdx) - - else: - for jdx, lig in enumerate(mol.ligands): # ligands - found = False - for ldx, typ in enumerate(typelist_ligs): - issame = compare_species(lig, typ[0], debug=1) - if issame : - found = True ; kdx = typ[1] - if debug >= 2: print(f"ligand {jdx} is the same with {ldx} in typelist") - if not found: - specs_found += 1 ; kdx = specs_found - typelist_ligs.append(list([lig, kdx])) - unique_species.append(lig) - if debug >= 2: print(f"New ligand found with: formula {lig.formula} and denticity={lig.denticity}, and added in position {kdx}") - unique_indices.append(kdx) - - for jdx, met in enumerate(mol.metals): # metals - found = False - for ldx, typ in enumerate(typelist_mets): - issame = compare_metals(met, typ[0], debug=1) - if issame : - found = True ; kdx = typ[1] - if debug >= 2: print(f"Metal {jdx} is the same with {ldx} in typelist") - if not found: - specs_found += 1 ; kdx = specs_found - typelist_mets.append(list([met, kdx])) - unique_species.append(met) - if debug >= 2: print(f"New Metal Center found with: labels {met.label} and added in position {kdx}") - unique_indices.append(kdx) - - return unique_species, unique_indices - ####################################################### -def get_poscharges(spec: object, debug: int=0): +def get_possible_cs(spec: object, debug: int=0): if not hasattr(spec,"protonation_states"): spec.get_protonation_states(debug=debug) if spec.protonation_states is None: return None if spec.subtype == "group" or (spec.subtype == 'molecule' and spec.is_complex): return None - selected_charge_states = [] ############################## #### Evaluates possible charges ############################## @@ -105,31 +46,30 @@ def get_poscharges(spec: object, debug: int=0): if debug >= 2: print(f" POSCHARGE will try charges {target_charges}") for ich in target_charges: - ch_state = get_charge(ich, prot) - ch_state.correction(prot) + ch_state = get_charge(ich, prot) ## Protonation is passed to the ch_state object (ch_state.protonation) charge_states.append(ch_state) #list_of_protonations_for_each_state.append(prot) if debug >= 2: print(f" POSCHARGE: charge 0 with smiles {ch_state.smiles}") if spec.subtype == "ligand": if spec.is_nitrosyl: - if spec.NO_type == "Linear": best_charge_state = charge_states[2] ## When Nitrosyl, we sistematically get the correct charge_distribution in [2] and [0] for Linear and Bent respectively - elif spec.NO_type == "Bent": best_charge_state = charge_states[0] - else: best_charge_state = select_charge_distr(charge_states, debug=debug) ## For ligands other than nitrosyl - else: best_charge_state = select_charge_distr(charge_states, debug=debug) ## For organic molecules - - ############################# - # For all protonations, it adds the resulting states to selected_charge_states - ############################# - found = False - for cs in spec.possible_cs: - if cs.corr_total_charge == best_charge_state.corr_total_charge: found = True - if not found: ## IMPORTANT. We only add possible states if the possible charge is already not considered for the specie. - selected_charge_states.append(best_charge_state) ## That is, we only consider one possible connectivity for each possible charge + if spec.NO_type == "Linear": possible_cs = charge_states[2] ## When Nitrosyl, we sistematically get the correct charge_distribution in [2] and [0] for Linear and Bent respectively + elif spec.NO_type == "Bent": possible_cs = charge_states[0] + else: possible_cs = select_charge_distr(charge_states, debug=debug) ## For ligands other than nitrosyl + else: possible_cs = select_charge_distr(charge_states, debug=debug) ## For organic molecules + + ## I think this is not necessary -> ############################# + ## I think this is not necessary -> # For all protonations, it adds the resulting states to selected_charge_states + ## I think this is not necessary -> ############################# + ## I think this is not necessary -> found = False + ## I think this is not necessary -> for cs in spec.possible_cs: + ## I think this is not necessary -> if cs.corr_total_charge == possible_cs.corr_total_charge: found = True + ## I think this is not necessary -> if not found: ## IMPORTANT. We only add possible states if the possible charge is already not considered for the specie. + ## I think this is not necessary -> selected_charge_states.append(possible_cs) ## That is, we only consider one possible connectivity for each possible charge ### HERE IS HAS FINISHED WITH ALL PROTONATIONS - if len(selected_charge_states) == 0: return None - else: return selected_charge_states + if len(possible_cs) == 0: return None + else: return possible_cs ####################################################### def select_charge_distr(charge_states: list, debug: int=0) -> list: @@ -211,26 +151,26 @@ def select_charge_distr(charge_states: list, debug: int=0) -> list: corr_charges.append(charge_states[idx].corr_total_charge) if debug >= 2: print(f" NEW SELECT FUNCTION: found corr_charges={corr_charges}") - good_idx = [] + good_states = [] for jdx, tgt_charge in enumerate(corr_charges): if debug >= 2: print(f" NEW SELECT FUNCTION: doing tgt_charge={tgt_charge}") list_for_tgt_charge = [] for idx in tmplist: if charge_states[idx].corr_total_charge == tgt_charge: - list_for_tgt_charge.append(idx) + list_for_tgt_charge.append(charge_states[idx]) if debug >= 2: print(f" NEW SELECT FUNCTION: charge_state added") # CASE 1, IF only one distribution meets the requirement. Then it is chosen if len(list_for_tgt_charge) == 1: - good_idx.append(list_for_tgt_charge[0]) + good_states.append(list_for_tgt_charge[0]) if debug >= 2: print(f" NEW SELECT FUNCTION: Case 1, only one entry for {tgt_charge} in tmplist") # CASE 2, IF more than one charge_state is found for a given final charge elif len(list_for_tgt_charge) > 1: - good_idx.append(list_for_tgt_charge[0]) + good_states.append(list_for_tgt_charge[0]) if debug >= 2: print(f" NEW SELECT FUNCTION: Case 2, more than one entry for {tgt_charge} in tmplist. Taking first") - return good_idx + return good_states ####################################################### def get_protonation_states(specie: object, debug: int=0) -> list: @@ -618,7 +558,6 @@ def get_charge(ich: int, prot: object, allow: bool=True, debug: int=0): # The adjacency matrix is also provided in the protonation state(adjmat) #:return charge_state which is an object with the necessary information for other functions to handle the result - pt = Chem.GetPeriodicTable() # needed to retrieve the default valences in the 2nd and 3rd checks natoms = prot.natoms atnums = prot.atnums @@ -646,19 +585,21 @@ def get_charge(ich: int, prot: object, allow: bool=True, debug: int=0): total_charge += a.GetFormalCharge() # Connectivity is checked - iscorrect = check_rdkit_mol_connectivity(mols[0], prot.natoms, debug=debug) + iscorrect = check_rdkit_mol_connectivity(mols[0], prot.natoms, ich, debug=debug) # Charge_state is initiated - ch_state = charge_state(iscorrect, total_charge, atom_charge, mols[0], smiles, ich, allow) + ch_state = charge_state(iscorrect, total_charge, atom_charge, mols[0], smiles, ich, allow, prot) return ch_state -def check_rdkit_mol_connectivity(mol: object, natoms: int, debug: int=0): +####################################################### +def check_rdkit_mol_connectivity(mol: object, natoms: int, ich: int, debug: int=0): # Here, the atom charge is retrieved, and the connectivity of each atom goes through 3 checks. # The variable iscorrect will track whether the overall generated structure is meaningful + pt = Chem.GetPeriodicTable() # Needed to retrieve the default valences in the 2nd and 3rd checks iscorrect = True for i in range(natoms): - a = mols[0].GetAtomWithIdx(i) # Returns a particular Atom + a = mol.GetAtomWithIdx(i) # Returns a particular Atom valence = a.GetTotalValence() # Valence of the atom in the mol object bonds = 0 countaromatic = 0 @@ -721,7 +662,8 @@ def get_list_of_charges_to_try(prot: object, debug: int=0) -> list: # Cases of same atom being connected to more than one metal if any(a.mconnec >= 2 for a in spec.atoms): pass - else: if maxcharge > spec.natoms: maxcharge = spec.natoms + else: + if maxcharge > spec.natoms: maxcharge = spec.natoms if maxcharge > 4: maxcharge = 4 ## At most, we try range(-4,5,1) if maxcharge < 2: maxcharge = 2 ## At leaest, we try range(-2,3,1) if debug >= 2: print(f"MAXCHARGE: maxcharge set at {maxcharge}") @@ -918,7 +860,7 @@ def prepare_unresolved(unique_indices: list, unique_species: list, distributions return list_molecules, list_indices, list_options ####################################################### -def prepare_mols(moleclist: list, unique_indices: list, unique_species: list, selected_charge_states: list, final_charge_distribution: list, debug: int=0) -> Tuple[list, bool]: +def prepare_mols(moleclist: list, unique_indices: list, unique_species: list, selected_cs: list, final_charge_distribution: list, debug: int=0) -> Tuple[list, bool]: # The charge and connectivity of a given specie in the unit cell is only determined for one representative case. i # For instance, if four molecules "A" are in the unit cell, only one is evaluated in the rest of the code. # This function ensures that all other "A" molecules in the unit cell end up having the same interpretation (charge and connectivity). @@ -927,66 +869,62 @@ def prepare_mols(moleclist: list, unique_indices: list, unique_species: list, se Warning = False idxtoallocate = 0 - # prints SUMMARY at start - if debug >= 1: - print(f"PREPARE: {len(selected_charge_states)} selected_charge_states received") - print("") - print(f"PREPARE: {len(moleclist)} molecules to prepare, of types") - for idx, mol in enumerate(moleclist): - print(f"PREPARE: Molecule {idx} is a {mol.subtype} with formula {mol.formula}"), - print("") - - for idx, sel in enumerate(selected_charge_states): - for jdx, opt in enumerate(sel): - chstate = opt[0] - prot = opt[1] - if debug >= 2: print(f"PREPARE: State {idx} and option {jdx}. Target state and protonation received with {chstate.corr_total_charge} and {prot.added_atoms}") for idx, mol in enumerate(moleclist): - if debug >= 2: print("") - if debug >= 2: print(f"PREPARE: Molecule {idx} is a {mol.subtype} with formula {mol.formula}"), ################################### ### FOR SOLVENT AND COUNTERIONS ### ################################### if mol.subtype == "molecule" and mol.iscomplex == False : - specie = unique_indices[idxtoallocate] - spec_object = unique_species[specie][1] - if debug >= 2: print(f"PREPARE: Specie number={specie} with molecule poscharges: {spec_object.possible_cs}") - if debug >= 2: print(f"PREPARE: Doing molecule {idx} with idxtoallocate: {idxtoallocate}") + spec = unique_species[mol.unique_index] # This is the reference specie + if debug >= 2: print(f"PREPARE: Doing molecule {idx} with unique_index: {mol.unique_index}") + if debug >= 2: print(f"PREPARE: Specie with poscharges: {spec.possible_cs}") allocated = False - for jdx, ch in enumerate(spec_object.possible_cs): - if final_charge_distribution[idxtoallocate] == ch and not allocated: # If the charge in poscharges coincides with the one for this entry in final_distribution - - tgt_charge_state = selected_charge_states[specie][jdx][0] - tgt_protonation = selected_charge_states[specie][jdx][1] - if debug >= 2: print(f"PREPARE: target state and protonation loaded, with {tgt_charge_state.corr_total_charge} and {tgt_protonation.added_atoms}") - allocated = True - ch_state = get_charge(mol.labels, mol.coord, mol.conmat, ch, mol.cov_factor) - ch_state.correction(tgt_protonation.addedlist, tgt_protonation.metal_electrons, tgt_protonation.elemlist) + for jdx, cs in enumerate(spec.possible_cs): + if final_charge_distribution[idxtoallocate] == cs.corr_total_charge and not allocated: # If the charge in poscharges coincides with the one for this entry in final_distribution + if debug >= 2: print(f"PREPARE: target state and protonation loaded, with {cs.corr_total_charge} and {cs.protonation.added_atoms}") + allocated = True + idxtoallocate += 1 + new_cs = get_charge(mol.labels, mol.coord, mol.conmat, cs.corr_total_charge, mol.cov_factor) - if ch_state.corr_total_charge == tgt_charge_state.corr_total_charge: - mol.charge(ch_state.corr_atom_charges, ch_state.corr_total_charge, ch_state.rdkit_mol, ch_state.smiles) + if new_cs.corr_total_charge == cs.corr_total_charge: + mol.set_charges(new_cs.corr_total_charge, new_cs.corr_atom_charges, new_cs.smiles, new_cs.rdkit_mol) if debug >= 2: print(f"PREPARE: Success doing molecule {idx}. Created Charge State with total_charge={ch_state.corr_total_charge}") else: - if debug >= 2: print(f"PREPARE: Error doing molecule {idx}. Created Charge State is different than Target") - Warning = True - - if allocated: - idxtoallocate += 1 - else: - idxtoallocate += 1 - if debug >= 2: print(f"PREPARE: Warning allocating molecule {idx} with {final_charge_distribution[idxtoallocate]} as target charge") - Warning = True + if debug >= 2: print(f"PREPARE: Error doing molecule {idx}. Created Charge State is different than Target {new_cs.corr_total_charge} vs {cs.corr_total_charge}") + return None ########################### ###### FOR LIGANDS ###### ########################### - elif mol.iscomplex : + elif mol.iscomplex: if debug >= 2: print(f"PREPARE: Molecule {moleclist.index(mol)} has {len(mol.ligands)} ligands") for kdx, lig in enumerate(mol.ligands): + spec = unique_species[lig.unique_index] # This is the reference specie + if debug >= 2: print(f"PREPARE: Doing Ligand {kdx} with unique_index: {lig.unique_index}") + if debug >= 2: print(f"PREPARE: Specie with poscharges: {spec.possible_cs}") + + allocated = False + for jdx, cs in enumerate(spec.possible_cs): + if final_charge_distribution[idxtoallocate] == cs.corr_total_charge and not allocated: # If the charge in poscharges coincides with the one for this entry in final_distribution + if debug >= 2: print(f"PREPARE: target state and protonation loaded, with {cs.corr_total_charge} and {cs.protonation.added_atoms}") + allocated = True + idxtoallocate += 1 + new_cs = get_charge(mol.labels, mol.coord, mol.conmat, cs.corr_total_charge, mol.cov_factor) + + if new_cs.corr_total_charge == cs.corr_total_charge: + mol.set_charges(new_cs.corr_total_charge, new_cs.corr_atom_charges, new_cs.smiles, new_cs.rdkit_mol) + if debug >= 2: print(f"PREPARE: Success doing molecule {idx}. Created Charge State with total_charge={ch_state.corr_total_charge}") + else: + if debug >= 2: print(f"PREPARE: Error doing molecule {idx}. Created Charge State is different than Target {new_cs.corr_total_charge} vs {cs.corr_total_charge}") + + + + + + #while not Warning: #try: allocated = False @@ -999,8 +937,8 @@ def prepare_mols(moleclist: list, unique_indices: list, unique_species: list, se for jdx, ch in enumerate(spec_object.possible_cs): if debug >= 2: print(f"PREPARE: Doing {ch} of options {spec_object.possible_cs}. jdx={jdx}") - tgt_charge_state = selected_charge_states[specie][jdx][0] - tgt_protonation = selected_charge_states[specie][jdx][1] + tgt_charge_state = selected_cs[specie][jdx][0] + tgt_protonation = selected_cs[specie][jdx][1] if debug >= 2: print(f"PREPARE: Found Target Prot State with {tgt_protonation.added_atoms} added atoms and {tgt_protonation.os} OS") if debug >= 2: print(f"PREPARE: addedlist of Target Prot State: {tgt_protonation.addedlist}") @@ -1047,7 +985,6 @@ def prepare_mols(moleclist: list, unique_indices: list, unique_species: list, se if lig.NO_type == "Bent": NOcharge = 0 ch_state = get_charge(prot.labels, prot.coords, prot.conmat, NOcharge, prot.cov_factor) - ch_state.correction(prot.addedlist, prot.metal_electrons, prot.elemlist) if debug >= 2: print(f"PREPARE: Found Nitrosyl of type= {lig.NO_type}") if debug >= 2: print(f"PREPARE: Wanted charge {ch}, obtained: {ch_state.corr_total_charge}") @@ -1055,7 +992,6 @@ def prepare_mols(moleclist: list, unique_indices: list, unique_species: list, se else: if debug >= 2: print(f"PREPARE: Sending getcharge with prot.added_atoms={prot.added_atoms} to obtain charge {ch}") ch_state = get_charge(prot.labels, prot.coords, prot.conmat, ch+prot.added_atoms, prot.cov_factor, tgt_charge_state.allow) - ch_state.correction(prot.addedlist, prot.metal_electrons, prot.elemlist) if debug >= 2: print(f"PREPARE: Wanted charge {ch}, obtained: {ch_state.corr_total_charge}") if debug >= 2: print(f"PREPARE: smiles: {ch_state.smiles}") @@ -1078,7 +1014,6 @@ def prepare_mols(moleclist: list, unique_indices: list, unique_species: list, se chargestried = get_list_of_charges_to_try(spec, prot) for ich in chargestried: ch_state = get_charge(prot.labels, prot.coords, prot.conmat, ich, prot.cov_factor) - ch_state.correction(prot.addedlist, prot.metal_electrons, prot.elemlist) list_of_charge_states.append(ch_state) list_of_protonations_for_each_state.append(prot) if debug >= 1: print(f" POSCHARGE: charge 0 with smiles {ch_state.smiles}") @@ -1329,62 +1264,55 @@ def correct_smiles_ligand(ligand: object): return smiles, obj ####################################################### - -################### -### NEW OBJECTS ### -################### class protonation(object): def __init__(self, labels, cov_factor, added_atoms, addedlist, block, metal_electrons, elemlist, tmpsmiles=" ", os=int(0), typ="Local", parent: object=None): - self.labels = labels - self.natoms = len(labels) - self.coords = coords - self.added_atoms = added_atoms - self.addedlist = addedlist - self.block = block - self.metal_electrons = metal_electrons - self.elemlist = elemlist - self.typ = typ - self.cov_factor = cov_factor - self.os = os - self.tmpsmiles = tmpsmiles - self.atnums = [elemdatabase.elementnr[l] for l in labels] # from xyz2mol - self.parent = parent + self.labels = labels + self.natoms = len(labels) + self.coords = coords + self.added_atoms = added_atoms + self.addedlist = addedlist + self.block = block + self.metal_electrons = metal_electrons + self.elemlist = elemlist + self.typ = typ + self.cov_factor = cov_factor + self.os = os + self.tmpsmiles = tmpsmiles + self.atnums = [elemdatabase.elementnr[l] for l in labels] # from xyz2mol + self.parent = parent self.radii = get_radii(labels) self.status, self.adjmat, self.adjnum = get_adjmatrix(self.labels, self.coords, self.cov_factor, self.radii) ####################################################### - class charge_state(object): - def __init__(self, status, uncorr_total_charge, uncorr_atom_charges, rdkit_mol, smiles, charge_tried, allow): - self.status = status - self.uncorr_total_charge = uncorr_total_charge - self.uncorr_atom_charges = uncorr_atom_charges - self.rdkit_mol = rdkit_mol - self.smiles = smiles - self.charge_tried = charge_tried - self.allow = allow + def __init__(self, status, uncorr_total_charge, uncorr_atom_charges, rdkit_mol: object, smiles: str, charge_tried: int, allow: bool, protonation: object): + self.status = status + self.uncorr_total_charge = uncorr_total_charge + self.uncorr_atom_charges = uncorr_atom_charges + self.rdkit_mol = rdkit_mol + self.smiles = smiles + self.charge_tried = charge_tried + self.allow = allow self.uncorr_abstotal, self.uncorr_abs_atcharge, self.uncorr_zwitt = eval_chargelist(uncorr_atom_charges) + self.protonation = protonation - if uncorr_total_charge == charge_tried: - self.coincide = True - else: - self.coincide = False + if uncorr_total_charge == charge_tried: self.coincide = True + else: self.coincide = False - def correction(self, addedlist, metal_electrons, elemlist): - self.addedlist = addedlist - self.metal_electrons = metal_electrons - self.elemlist = elemlist - self.corr_total_charge = int(0) - self.corr_atom_charges = [] + self.addedlist = protonation.addedlist + self.metal_electrons = protonation.metal_electrons + self.elemlist = protonation.elemlist + self.corr_total_charge = int(0) + self.corr_atom_charges = [] # Corrects the Charge of atoms with addedH count = 0 - if len(addedlist) > 0: - for idx, add in enumerate(addedlist): # Iterates over the original number of ligand atoms, thus without the added H + if len(self.addedlist) > 0: + for idx, add in enumerate(self.addedlist): # Iterates over the original number of ligand atoms, thus without the added H if add != 0: count += 1 - corrected = self.uncorr_atom_charges[idx] - addedlist[idx] + metal_electrons[idx] - self.uncorr_atom_charges[len(addedlist)-1+count] + corrected = self.uncorr_atom_charges[idx] - self.addedlist[idx] + metal_electrons[idx] - self.uncorr_atom_charges[len(self.addedlist)-1+count] self.corr_atom_charges.append(corrected) # last term corrects for cases in which a charge has been assigned to the added atom else: diff --git a/cell2mol/classes.py b/cell2mol/classes.py index 8bd69b67..6cd672b1 100644 --- a/cell2mol/classes.py +++ b/cell2mol/classes.py @@ -2,25 +2,25 @@ import sys from cell2mol.connectivity import get_adjacency_types, get_element_count, labels2electrons, labels2formula, labels2ratio, get_adjmatrix, compare_atoms, compare_species from cell2mol.connectivity import get_metal_idxs, split_species, get_radii -from cell2mol.cell_reconstruct import * +from cell2mol.cell2mol.cell_reconstruction import * from cell2mol.cell_operations import cart2frac, frac2cart_fromcellvec, frac2cart_fromparam -from cell2mol.yuri_formal_charge import * +from cell2mol.cell2mol.charge_assignment import * from cell2mol.yuri_spin import * from cell2mol.other import extract_from_list, compute_centroid, get_dist, get_angle from cell2mol.elementdata import ElementData elemdatabase = ElementData() import pickle -################################ -#### BASIS FOR CELL2MOL 2 #### -################################ +################################## +#### CLASSES FOR CELL2MOL 2 #### +################################## class specie(object): def __init__(self, labels: list, coord: list, parent_indices: list=None, radii: list=None, parent: object=None) -> None: # Sanity Checks assert len(labels) == len(coord) - if parent_indices is not None: assert len(labels) == len(parent_indices) - if radii is not None: assert len(labels) == len(radii) + if parent_indices is not None: assert len(labels) == len(parent_indices) + if radii is not None: assert len(labels) == len(radii) # Optional Information if radii is not None: self.radii = radii @@ -97,7 +97,7 @@ def reset_charge(self): a.reset_charge() ############ - def set_charges(self, totcharge: int=None, atomic_charges: list=None) -> None: + def set_charges(self, totcharge: int=None, atomic_charges: list=None, smiles: str=None, rdkit_obj: object=None) -> None: ## Sets total charge if totcharge is not None: self.totcharge = totcharge elif totcharge is None and atomic_charges is not None: self.totcharge = np.sum(atomic_charges) @@ -108,6 +108,8 @@ def set_charges(self, totcharge: int=None, atomic_charges: list=None) -> None: if not hasattr(self,"atoms"): self.set_atoms() for idx, a in enumerate(self.atoms): a.set_charge(self.atomic_charges[idx]) + if smiles is not None: self.smiles = smiles + if rdkit_obj is not None: self.rdkit_obj = rdkit_obj ############ def set_atoms(self, atomlist=None, overwrite_parent: bool=False): @@ -197,7 +199,7 @@ def get_protonation_states(self, debug: int=0): def get_possible_cs(self, debug: int=0): ## Arranges a list of possible charge_states associated with this species, which is later managed at the cell level to determine the good one if not hasattr(self,"protonation_states"): self.get_protonation_states(debug=debug) - if self.protonation_states is not None: self.possible_cs = get_poscharges(self, debug=debug) + if self.protonation_states is not None: self.possible_cs = get_possible_cs(self, debug=debug) return self.possible_cs ############ @@ -767,6 +769,69 @@ def __init__(self, name: str, labels: list, pos: list, cellvec: list, cellparam: self.cellparam = cellparam self.natoms = len(labels) + ####################################################### + def get_unique_species(self, debug: int=0): + if not hasattr(self,"is_fragmented"): self.reconstruct(debug=debug) + if self.is_fragmented: return None # Stopping. self.is_fragmented must be false to determine the charges of the cell + + self.unique_species = [] + self.unique_indices = [] + + typelist_mols = [] # temporary variable + typelist_ligs = [] # temporary variable + typelist_mets = [] # temporary variable + + specs_found = -1 + for idx, mol in enumerate(self.moleclist): + if debug >= 2: print(f"Molecule {idx} formula={mol.formula}") + if not mol.iscomplex: + found = False + for ldx, typ in enumerate(typelist_mols): # Molecules + issame = compare_species(mol, typ[0], debug=1) + if issame : + found = True ; kdx = typ[1] + if debug >= 2: print(f"Molecule {idx} is the same with {ldx} in typelist") + if not found: + specs_found += 1 ; kdx = specs_found + typelist_mols.append(list([mol, kdx])) + self.unique_species.append(mol) + if debug >= 2: print(f"New molecule found with: formula={mol.formula} and added in position {kdx}") + self.unique_indices.append(kdx) + mol.unique_index = kdx + + else: + for jdx, lig in enumerate(mol.ligands): # ligands + found = False + for ldx, typ in enumerate(typelist_ligs): + issame = compare_species(lig, typ[0], debug=1) + if issame : + found = True ; kdx = typ[1] + if debug >= 2: print(f"ligand {jdx} is the same with {ldx} in typelist") + if not found: + specs_found += 1 ; kdx = specs_found + typelist_ligs.append(list([lig, kdx])) + self.unique_species.append(lig) + if debug >= 2: print(f"New ligand found with: formula {lig.formula} and denticity={lig.denticity}, and added in position {kdx}") + self.unique_indices.append(kdx) + lig.unique_index = kdx + + for jdx, met in enumerate(mol.metals): # metals + found = False + for ldx, typ in enumerate(typelist_mets): + issame = compare_metals(met, typ[0], debug=1) + if issame : + found = True ; kdx = typ[1] + if debug >= 2: print(f"Metal {jdx} is the same with {ldx} in typelist") + if not found: + specs_found += 1 ; kdx = specs_found + typelist_mets.append(list([met, kdx])) + self.unique_species.append(met) + if debug >= 2: print(f"New Metal Center found with: labels {met.label} and added in position {kdx}") + self.unique_indices.append(kdx) + met.unique_index = kdx + + return self.unique_species + ####################################################### def get_fractional_coord(self): self.frac_coord = cart2frac(self.coord, self.cellv) @@ -875,7 +940,7 @@ def data_for_postproc(self, molecules: list, indices: list, options: list): ####################################################### def reconstruct(self, cov_factor: float=None, metal_factor: float=None, debug: int=0): - from cell2mol.cell_reconstruct import classify_fragments, fragments_reconstruct + from cell2mol.cell2mol.cell_reconstruction import classify_fragments, fragments_reconstruct if not hasattr(self,"refmoleclist"): print("CELL.RECONSTRUCT. CELL missing list of reference molecules"); return None if not hasattr(self,"moleclist"): self.get_moleclist() blocklist = self.moleclist.copy() # In principle, in moleclist now there are both fragments and molecules @@ -919,16 +984,17 @@ def assign_charges(self, debug: int=0) -> object: # Initiates variables ############ + # (0) Makes sure the cell is reconstructed if not hasattr(self,"is_fragmented"): self.reconstruct(debug=debug) if self.is_fragmented: return None # Stopping. self.is_fragmented must be false to determine the charges of the cell # (1) Indentify unique chemical species - self.speclist, self.unique_indices = identify_unique_species(self.moleclist, debug=debug) - if debug >= 1: print(f"{len(self.speclist)} Species (Metal or Ligand or Molecules) to Characterize") + if not hasattr(self,"unique_species"): self.get_unique_species(debug=debug) + if debug >= 1: print(f"{len(self.unique_species)} Species (Metal or Ligand or Molecules) to Characterize") # (2) Gets a preliminary list of possible charge states for each specie (former drive_poscharge) selected_cs = [] - for idx, spec in enumerate(self.speclist): + for idx, spec in enumerate(self.unique_species): tmp = spec.get_possible_cs(debug=debug) if tmp is None: self.error_empty_poscharges = True; return None # Empty list of possible charges received. Stopping if spec.subtype == "metal": selected_cs.append([]) ## I don't like to add an empty list @@ -936,13 +1002,13 @@ def assign_charges(self, debug: int=0) -> object: self.error_empty_poscharges = False # Finds the charge_state that satisfies that the crystal must be neutral - final_charge_distribution = balance_charge(self.unique_indices, self.speclist, debug=debug) + final_charge_distribution = balance_charge(self.unique_indices, self.unique_species, debug=debug) if len(final_charge_distribution) > 1: if debug >= 1: print("More than one Possible Distribution Found:", final_charge_distribution) self.error_multiple_distrib = True self.error_empty_distrib = False - pp_mols, pp_idx, pp_opt = prepare_unresolved(self.unique_indices, self.speclist, final_charge_distribution, debug=debug) + pp_mols, pp_idx, pp_opt = prepare_unresolved(self.unique_indices, self.unique_species, final_charge_distribution, debug=debug) self.data_for_postproc(pp_mols, pp_idx, pp_opt) return None elif len(final_charge_distribution) == 0: # @@ -956,7 +1022,7 @@ def assign_charges(self, debug: int=0) -> object: print("#########################################") print("Assigning Charges and Preparing Molecules") print("#########################################") - self.moleclist, self.error_prepare_mols = prepare_mols(self.moleclist, self.unique_indices, self.speclist, selected_cs, final_charge_distribution[0], debug=debug) + self.moleclist, self.error_prepare_mols = prepare_mols(self.moleclist, self.unique_indices, self.unique_species, selected_cs, final_charge_distribution[0], debug=debug) if self.error_prepare_mols: return None # Error while preparing molecules return self.moleclist @@ -1044,4 +1110,4 @@ def __repr__(self): to_print += f' With Formulae: \n' for idx, ref in enumerate(self.refmoleclist): to_print += f' {idx}: {ref.formula} \n' - return to_print \ No newline at end of file + return to_print diff --git a/cell2mol/formal_charge.py b/cell2mol/formal_charge.py deleted file mode 100644 index e7c0a93d..00000000 --- a/cell2mol/formal_charge.py +++ /dev/null @@ -1,1669 +0,0 @@ -#!/usr/bin/env python - -# fmt: off - -import numpy as np -import itertools -import sys -import time -from typing import Tuple -from collections import defaultdict - -from cell2mol.tmcharge_common import getradii, getconec, find_closest_metal -from cell2mol.xyz2mol import int_atom, xyz2mol -from cell2mol.missingH import getangle -from cell2mol.hungarian import reorder -from cell2mol.elementdata import ElementData -from cell2mol.connectivity import add_atom -elemdatabase = ElementData() - -from cell2mol.classes import specie, molecule, ligand, atom, metal, group, bond - -############################# -### Loads Rdkit & xyz2mol ### -############################# - -from rdkit import Chem -from rdkit.Chem.Draw.MolDrawing import DrawingOptions # Only needed if modifying defaults -DrawingOptions.bondLineWidth = 2.2 - -# IPythonConsole.ipython_useSVG = False -from rdkit import rdBase -if "ipykernel" in sys.modules: - try: - from rdkit.Chem.Draw import IPythonConsole - except ModuleNotFoundError: - pass -# print("RDKIT Version:", rdBase.rdkitVersion) -rdBase.DisableLog("rdApp.*") - - -####################################################### -def classify_mols(moleclist: list, debug: int=0) -> Tuple[list, list, list, list]: - - # This subroutine reads all molecules and - # identifies those that are identical based on a sort of ID number, which is - # the variable "elemcountvec". This is a vector of occurrences for all elements in the elementdatabase - - molec_indices = [] # molecule index - ligand_indices = [] # ligand index - unique_indices = [] # Unique Specie (molecule or ligand) Index in Typelist - unique_species = [] # List of Unique Species - typelist_mols = [] # List of ID codes for Molecules - typelist_ligs = [] # List of ID codes for Ligands - typelist_mets = [] - - # The reason for splitting typelist in two is that - # some species might appear as ligands and as counterions, (eg. Cl-) - # in the same unit cell. It is cleaner if these cases are treated separately - - # Note that both "typelists", and "unique_species" will be a list of tuples. - specs_found = -1 - # Non-Complexes - for idx, mol in enumerate(moleclist): - if not mol.iscomplex: - found = False - for ldx, typ in enumerate(typelist_mols): - if compare_species(mol,typ[0]) and not found: - found = True - kdx = typ[1] - if debug >= 2: print(f"Molecule {idx} is the same than {ldx} in typelist") - if not found: - if debug >= 2: print(f"New molecule found with: formula={mol.formula}, totmconnec={mol.totmconnec}, and added in position {kdx}") - specs_found += 1 - kdx = specs_found - typelist_mols.append(mol, kdx) - unique_species.append(mol) - - molec_indices.append(idx) - ligand_indices.append("-") - unique_indices.append(kdx) - - # Complexes - elif mol.iscomplex: - for jdx, lig in enumerate(mol.ligands): - found = False - # Ligands - for ldx, typ in enumerate(typelist_ligs): - if compare_species(lig,typ[0]) and not found: - found = True - kdx = typ[1] - if debug >= 2: print(f"Ligand {jdx} is the same than {ldx} in typelist") - if not found: - specs_found += 1 - kdx = specs_found - if debug >= 2: print(f"New ligand found with: formula {lig.formula} and totmconnec={lig.totmconnec}, and added in position {kdx}") - typelist_ligs.append(lig, kdx) - unique_species.append(lig) - - molec_indices.append(idx) - ligand_indices.append(jdx) - unique_indices.append(kdx) - - # Metals - for jdx, met in enumerate(mol.metals): - found = False - for ldx, typ in enumerate(typelist_mets): - if compare_metals(met,typ[0]) and not found: - found = True - kdx = typ[1] - if debug >= 2: print(f"Metal {jdx} is the same than {ldx} in typelist") - if not found: - specs_found += 1 - kdx = specs_found - if debug >= 2: print(f"New Metal Center found with: labels {met.label} and added in position {kdx}") - typelist_mets.append(met, kdx) - unique_species.append(met) - - molec_indices.append(idx) - ligand_indices.append(jdx) - unique_indices.append(kdx) - - if debug >= 2: print("CLASSIFY: molec_indices", molec_indices) - if debug >= 2: print("CLASSIFY: ligand_indices", ligand_indices) - if debug >= 2: print("CLASSIFY: unique_indices", unique_indices) - - nspecs = len(unique_species) - for idx in range(0, nspecs): - count = unique_indices.count(idx) - if debug >= 2: print(f"CLASSIFY: specie {idx} appears {count} times, with type: {unique_species[idx][0]}") - - return molec_indices, ligand_indices, unique_indices, unique_species - - -####################################################### -def getcharge(labels: list, pos: list, conmat: np.ndarray, ich: int, cov_factor: float=1.3, allow: bool=True, debug: int=0) -> list: - ## Generates the connectivity of a molecule given a charge. - # The molecule is described by the labels, and the atomic cartesian coordinates "pos" - # The adjacency matrix is also provided (conmat) - #:return iscorrect: boolean variable with a notion of whether the function delivered a good=True or bad=False connectivity - #:return total_charge: total charge associated with the connectivity - #:return atom_charge: atomic charge for each atom of the molecule - #:return mols: rdkit molecule object - #:return smiles: smiles representation of the molecule - - pt = Chem.GetPeriodicTable() # needed to retrieve the default valences in the 2nd and 3rd checks - natoms = len(labels) - atnums = [int_atom(label) for label in labels] # from xyz2mol - - ########################## - # xyz2mol is called here # - ########################## - # use_graph is called for a faster generation - # allow_charged_fragments is necessary for non-neutral molecules - # embed_chiral shouldn't ideally be necessary, but it runs a sanity check that improves the proposed connectivity - # use_huckel false means that the xyz2mol adjacency will be generated based on atom distances and vdw radii. - # Ideally, the adjacency matrix could be provided - - mols = xyz2mol(atnums,pos,conmat,cov_factor,charge=ich,use_graph=True,allow_charged_fragments=allow,embed_chiral=True,use_huckel=False) - if len(mols) > 1: print("WARNING: More than 1 mol received from xyz2mol for initcharge:", ich) - - # smiles is generated with rdkit - smiles = Chem.MolToSmiles(mols[0]) - - # Here, the atom charge is retrieved, and the connectivity of each atom goes through 3 checks. - # The variable iscorrect will track whether the overall generated structure is meaningful - iscorrect = True - atom_charge = [] - total_charge = 0 - for i in range(natoms): - a = mols[0].GetAtomWithIdx(i) # Returns a particular Atom - atom_charge.append(a.GetFormalCharge()) - - valence = a.GetTotalValence() # Valence of the atom in the mol object - bonds = 0 - countaromatic = 0 - for b in a.GetBonds(): # Returns a read-only sequence containing all of the molecule’s Bonds - bonds += b.GetBondTypeAsDouble() - # total number of bonds (weighted by bond order) of the atom in the mol object - # Returns the type of the bond as a double (i.e. 1.0 for SINGLE, 1.5 for AROMATIC, 2.0 for DOUBLE) - if b.GetBondTypeAsDouble() == 1.5: - countaromatic += 1 - if countaromatic % 2 != 0: - bonds -= 0.5 - - total_charge += a.GetFormalCharge() - lonepairs = (elemdatabase.valenceelectrons[a.GetSymbol()] - a.GetFormalCharge() - valence) / 2 - totalvalenceelectrons = int(bonds) + int(lonepairs) * 2 + a.GetFormalCharge() - - # Checks the quality of the resulting smiles - # First check, the number of lonepairs is computed and should make sense - if lonepairs != 0 and lonepairs != 1 and lonepairs != 2 and lonepairs != 3 and lonepairs != 4: - if debug >= 2: - print("GETCHARGE: 2nd Check-lonepairs=", i, a.GetSymbol(), lonepairs) - iscorrect = False - - # RDKIT has some troubles assigning the valence for atoms with aromatic bonds. - # So the 2nd and 3rd Check applies only for countaromatic==0 - if countaromatic == 0: - # Second check, the number of bonds should coincide with the valence. - # I know it should be the same, but in bad SMILES they often do not coincide - if bonds != valence: - if debug >= 2: - print("GETCHARGE: 1st Check-bonds/valence:",i,a.GetSymbol(),bonds,valence) - iscorrect = False - if debug >= 2: - for b in a.GetBonds(): - print(b.GetBondTypeAsDouble(),b.GetBeginAtomIdx(),b.GetEndAtomIdx()) - - # Third check, using the totalvalenceelectrons - if totalvalenceelectrons != elemdatabase.valenceelectrons[a.GetSymbol()]: - if debug >= 2: print("GETCHARGE: 3rd Check: Valence gives false for atom",i,a.GetSymbol(),"with:",totalvalenceelectrons,elemdatabase.valenceelectrons[a.GetSymbol()]) - iscorrect = False - - if debug >= 2 and i == 0: - print("ich, atom idx, label, charge, pt.GetDefaultValence(a.GetAtomicNum()), valence, num bonds, num lonepairs, iscorrect") - if debug >= 2: print(ich,i,a.GetSymbol(),a.GetFormalCharge(),pt.GetDefaultValence(a.GetAtomicNum()),valence,int(bonds),int(lonepairs),iscorrect) - - # Creates the charge_state - try: - ch_state = charge_state(iscorrect, total_charge, atom_charge, mols[0], smiles, ich, allow) - except Exception as exc: - if debug >= 1: print(f" GETCHARGE: EXCEPTION in charge_state creation: {exc}") - - return ch_state - -####################################################### -def select_charge_distr(charge_states: list, debug: int=0) -> list: - # This function selects the best charge_distribuion among the ones generated by the function "getcharge" - # It does so based, in general, on the number of charges in each connectivity. - #:return goodlist: list of acceptable charge distributions. - # goodlist contains the indices of those distributions as the enter this function - - nlists = len(charge_states) - uncorr_total = [] - uncorr_abs_total = [] - uncorr_abs_atcharge = [] - uncorr_zwitt = [] - coincide = [] - for chs in charge_states: - uncorr_total.append(chs.uncorr_total_charge) - uncorr_abs_total.append(chs.uncorr_abstotal) - uncorr_abs_atcharge.append(chs.uncorr_abs_atcharge) - uncorr_zwitt.append(chs.uncorr_zwitt) - coincide.append(chs.coincide) - - if debug >= 2: print(f" NEW SELECT FUNCTION: uncorr_total: {uncorr_total}") - if debug >= 2: print(f" NEW SELECT FUNCTION: uncorr_abs_total: {uncorr_abs_total}") - if debug >= 2: print(f" NEW SELECT FUNCTION: uncorr_abs_atcharge: {uncorr_abs_atcharge}") - if debug >= 2: print(f" NEW SELECT FUNCTION: uncorr_zwitt: {uncorr_zwitt}") - if debug >= 2: print(f" NEW SELECT FUNCTION: coincide: {coincide}") - - minoftot = np.min(uncorr_abs_total) - minofabs = np.min(uncorr_abs_atcharge) - listofmintot = [i for i, x in enumerate(uncorr_abs_total) if x == minoftot] - listofminabs = [i for i, x in enumerate(uncorr_abs_atcharge) if x == minofabs] - if debug >= 2: print(f" NEW SELECT FUNCTION: listofmintot: {listofmintot}") - if debug >= 2: print(f" NEW SELECT FUNCTION: listofminabs: {listofminabs}") - # Searches for entries that have the smallest total charge(appear in listofmintot), - # and smallest number of charges(appear in listofminabs) - - #################### - # building tmplist # - #################### - tmplist = [] - for idx in range(0, nlists): - if (idx in listofminabs) and (idx in listofmintot) and coincide[idx]: - tmplist.append(idx) - - # IF listofminabs and listofmintot do not have any value in common. Then we select from minima, coincide, and zwitt - if len(tmplist) == 0: - if debug >= 2: print(" NEW SELECT FUNCTION: No entry in initial tmplist. We now select from minima, coincide and zwitt:") - for idx in range(0, nlists): - if ((idx in listofminabs) or (idx in listofmintot)) and coincide[idx] and not uncorr_zwitt[idx]: - tmplist.append(idx) - - # IF no values yet, we relax the criterion - if len(tmplist) == 0: - if debug >= 2: print(" NEW SELECT FUNCTION: No entry in initial tmplist yet. We now select from minima and coincide:") - for idx in range(0, nlists): - if ((idx in listofminabs) or (idx in listofmintot)) and coincide[idx]: - tmplist.append(idx) - - # IF no values yet, we relax the criterion even more - if len(tmplist) == 0: - if debug >= 2: print(" NEW SELECT FUNCTION: No entry in initial tmplist yet. We now select from minima:") - for idx in range(0, nlists): - if ((idx in listofminabs) or (idx in listofmintot)): - tmplist.append(idx) - - #################### - # tmplist is built # - #################### - if debug >= 2: print(f" NEW SELECT FUNCTION: tmplist: {tmplist}, including:") - for idx in range(0, nlists): - if idx in tmplist: - if debug >= 2: print(f" NEW SELECT FUNCTION: Corr_charge={charge_states[idx].corr_total_charge}") - if debug >= 2: print(f" NEW SELECT FUNCTION: Smiles={charge_states[idx].smiles}") - - corr_charges = [] - for idx in range(0, nlists): - if idx in tmplist: - if charge_states[idx].corr_total_charge not in corr_charges: - corr_charges.append(charge_states[idx].corr_total_charge) - if debug >= 2: print(f" NEW SELECT FUNCTION: found corr_charges={corr_charges}") - - good_idx = [] - for jdx, tgt_charge in enumerate(corr_charges): - if debug >= 2: print(f" NEW SELECT FUNCTION: doing tgt_charge={tgt_charge}") - list_for_tgt_charge = [] - for idx in tmplist: - if charge_states[idx].corr_total_charge == tgt_charge: - list_for_tgt_charge.append(idx) - if debug >= 2: print(f" NEW SELECT FUNCTION: charge_state added") - - # CASE 1, IF only one distribution meets the requirement. Then it is chosen - if len(list_for_tgt_charge) == 1: - good_idx.append(list_for_tgt_charge[0]) - if debug >= 2: print(f" NEW SELECT FUNCTION: Case 1, only one entry for {tgt_charge} in tmplist") - - # CASE 2, IF more than one charge_state is found for a given final charge - elif len(list_for_tgt_charge) > 1: - good_idx.append(list_for_tgt_charge[0]) - if debug >= 2: print(f" NEW SELECT FUNCTION: Case 2, more than one entry for {tgt_charge} in tmplist. Taking first") - - return good_idx - -####################################################### -def drive_get_poscharges(unique_species: list, debug: int=0) -> Tuple[list, bool]: - - Warning = False - # For each specie, a plausible_charge_state is generated. These include the actual charge state, and the protonation it belongs to - selected_charge_states = [] - for idx, spec in enumerate(unique_species): - - spec[1].poscharge = [] - spec[1].posatcharge = [] - spec[1].posobjlist = [] - spec[1].posspin = [] - spec[1].possmiles = [] - - # Obtains charge for Organic Molecules - if debug >= 1: print("") - if spec[0] == "Other": - if debug >= 1: print(" ---------------") - if debug >= 1: print(" #### NON-Complex ####") - if debug >= 1: print(" ---------------") - if debug >= 1: print(" ", spec[1].natoms, spec[1].formula) - elif spec[0] == "Ligand": - if debug >= 1: print(" ---------------") - if debug >= 1: print(" #### Ligand ####") - if debug >= 1: print(" ---------------") - if debug >= 1: print(" ", spec[1].natoms, spec[1].formula, spec[1].totmconnec) - elif spec[0] == "Metal": - if debug >= 1: print(" ---------------") - if debug >= 1: print(" #### Metal ####") - if debug >= 1: print(" ---------------") - if debug >= 1: print(" ", spec[1].label, "coordination_sphere\t", spec[1].coord_sphere) - if debug >= 1: print(" ", spec[1].label, "coordinating_atoms\t", spec[1].coordinating_atoms) - - - # Gets possible Charges for Ligands and Other - if spec[0] == "Other" or spec[0] == "Ligand": - tmp, Warning = get_poscharges(spec, debug=debug) - if Warning: - if debug >= 1: print(f"Empty possible charges received for molecule {spec[1].formula}") - else: - if debug >= 1: print("Charge state and protonation received for molecule", len(tmp)) - selected_charge_states.append(tmp) - - # Gets possible Charges for Metals - elif spec[0] == "Metal": - met = spec[1] - mol = spec[2] - met.poscharge = get_metal_poscharge(met, mol, debug=debug) - if debug >= 1: print("Possible charges received for metal:", met.poscharge) - selected_charge_states.append([]) - - return selected_charge_states, Warning - -####################################################### -def get_list_of_charges_to_try(spec: list, prot: object, debug: int=0) -> list: - lchar = [] - - #### Educated Guess on the Maximum Charge one can expect from the spec[1] - if spec[0] == "Other": maxcharge = 3 - elif spec[0] == "Ligand": - count_non_connected_O = 0 - for a in spec[1].atoms: - if a.label == "O" and a.mconnec == 0 and a.connec == 1: - count_non_connected_O += 1 - if not spec[1].hapticity: - maxcharge = spec[1].totmconnec + count_non_connected_O - prot.added_atoms - if debug >= 2: print(f"MAXCHARGE: maxcharge set at {maxcharge} with {spec[1].totmconnec}+{count_non_connected_O}-{prot.added_atoms}") - else: - maxcharge = 2 - # Cases of same atom being connected to more than one metal - if any(a.mconnec >= 2 for a in spec[1].atoms): - pass - else: - if maxcharge > spec[1].natoms: maxcharge = spec[1].natoms - if maxcharge > 4: maxcharge = 4 - if maxcharge < 2: maxcharge = 2 - if debug >= 2: print(f"MAXCHARGE: maxcharge set at {maxcharge}") - - # Defines list of charges that will try - for magn in range(0, int(maxcharge + 1)): - if magn == 0: - signlist = [1] - elif magn != 0: - signlist = [-1, 1] - for sign in signlist: - ich = int(magn * sign) - lchar.append(ich) - return lchar - -####################################################### -def eval_chargelist(atom_charges: list, debug: int=0) -> Tuple[np.ndarray, np.ndarray, bool]: - abstotal = np.abs(np.sum(atom_charges)) - abs_atlist = [] - for a in atom_charges: - abs_atlist.append(abs(a)) - abs_atcharge = np.sum(abs_atlist) - if any(b > 0 for b in atom_charges) and any(b < 0 for b in atom_charges): - zwitt = True - else: - zwitt = False - return abstotal, abs_atcharge, zwitt - -####################################################### -def get_poscharges(spec: list, debug: int=0) -> Tuple[list, bool]: - # This function drives the detrmination of the charge for a "ligand=lig" of a given "molecule=mol" - # The whole process is done by the functions: - # 1) define_sites, which determines which atoms must have added elements (see above) - # 2) once any atom is added, the function "getcharge" generates one connectivity for a set of charges - # 3) select_charge_distr chooses the best connectivity among the generated ones. - - # Basically, this function connects these other three functions, - # while managing some key information for those - # Initiates variables - Warning = False - selected_charge_states = [] - # Finds maximum plausible charge for the specie - - ############################## - #### Creates protonation states. That is, geometries in which atoms have been added to the original molecule - ############################## - if spec[0] == "Ligand": - list_of_protonations = define_sites(spec[1], spec[2], debug) - elif spec[0] == "Other": - empty_protonation = protonation(spec[1].labels, spec[1].coord, spec[1].factor, int(0), [], [], [], [], typ="Empty") - list_of_protonations = [] - list_of_protonations.append(empty_protonation) - if debug >= 2: print(f" POSCHARGE: doing empty PROTONATION for this specie") - if debug >= 2: print(f" POSCHARGE: received {len(list_of_protonations)} protonations for this specie") - - ############################## - #### Evaluates possible charges except if the ligand is a nitrosyl - ############################## - if spec[0] == "Ligand" and spec[1].natoms == 2 and "N" in spec[1].labels and "O" in spec[1].labels: - for prot in list_of_protonations: - list_of_charge_states = [] - list_of_protonations_for_each_state = [] - chargestried = get_list_of_charges_to_try(spec, prot) - if debug >= 2: print(f" POSCHARGE will try charges {chargestried}") - - for ich in chargestried: - ch_state = getcharge(prot.labels, prot.coordinates, prot.conmat, ich, prot.factor) - ch_state.correction(prot.addedlist, prot.metal_electrons, prot.elemlist) - list_of_charge_states.append(ch_state) - list_of_protonations_for_each_state.append(prot) - if debug >= 2: print(f" POSCHARGE: charge 0 with smiles {ch_state.smiles}") - - NO_type = get_nitrosyl_geom(spec[1]) - if NO_type == "Linear": - best_charge_distr_idx = [2] - elif NO_type == "Bent": - best_charge_distr_idx = [0] - - ############################# - # For all protonations, it adds the resulting states to selected_charge_states - ############################# - for idx in best_charge_distr_idx: - c = list_of_charge_states[idx] - p = list_of_protonations_for_each_state[idx] - if c.corr_total_charge not in spec[1].poscharge: - spec[1].poscharge.append(c.corr_total_charge) - spec[1].posatcharge.append(c.corr_atom_charges) - spec[1].posobjlist.append(c.rdkit_mol) - spec[1].possmiles.append(c.smiles) - # Selected_charge_states is a tuple with the actual charge state, and the protonation it belongs to - selected_charge_states.append([c,p]) - if debug >= 2: print(f" POSCHARGE. poscharge added with corrected charge: {c.corr_total_charge} and uncorrected: {c.uncorr_total_charge}") - if debug >= 2: print(f" POSCHARGE. poscharge added with smiles: {c.smiles}") - - ############################## - #### Evaluates possible charges except if the ligand is a azide (N3-) ion - ############################## - elif spec[0] == "Ligand" and spec[1].natoms == 3 and len(np.unique(spec[1].labels)) == 1 and "N" in np.unique(spec[1].labels): - poscharge = -1 - # metal-coordinating N charge +1 - # The other two N atoms charge -1 - ############################## - #### Evaluates possible charges except if the ligand is a triiodide (I3-) ion - ############################## - elif spec[0] == "Ligand" and spec[1].natoms == 3 and len(np.unique(spec[1].labels)) == 1 and "I" in np.unique(spec[1].labels): - poscharge = -1 - # metal-coordinating I charge -1 - # The other two I atoms charge 0 - - ############################## - # If not a Nitrosyl ligand, choose among the charge_states for this protonation - ############################## - else: - list_of_charge_states = [] - list_of_protonations_for_each_state = [] - for prot in list_of_protonations: - if debug >= 2: print(" ") - if debug >= 2: print(f" POSCHARGE: doing PROTONATION with added atoms: {prot.added_atoms}") - chargestried = get_list_of_charges_to_try(spec, prot) - if debug >= 2: print(f" POSCHARGE will try charges {chargestried}") - - for ich in chargestried: - try: - ch_state = getcharge(prot.labels, prot.coordinates, prot.conmat, ich, prot.factor) - ch_state.correction(prot.addedlist, prot.metal_electrons, prot.elemlist) - list_of_charge_states.append(ch_state) - list_of_protonations_for_each_state.append(prot) - if debug >= 2: print(f" POSCHARGE: charge {ich} with smiles {ch_state.smiles}") - except Exception as exc: - if debug >= 1: print(f" POSCHARGE: EXCEPTION in get_poscharges: {exc}") - - ############################# - # Selects the best distribution(s) for all protonations. We retrieve the indices first, and then move the objects to the list - ############################# - if len(list_of_charge_states) > 0: - best_charge_distr_idx = select_charge_distr(list_of_charge_states, debug=debug) - else: - if debug >= 2: print(f" POSCHARGE. found EMPTY best_charge_distr_idx for PROTONATION state") - best_charge_distr_idx = [] - - ############################# - # For all protonations, it adds the resulting states to selected_charge_states - ############################# - for idx in best_charge_distr_idx: - c = list_of_charge_states[idx] - p = list_of_protonations_for_each_state[idx] - if c.corr_total_charge not in spec[1].poscharge: - spec[1].poscharge.append(c.corr_total_charge) - spec[1].posatcharge.append(c.corr_atom_charges) - spec[1].posobjlist.append(c.rdkit_mol) - spec[1].possmiles.append(c.smiles) - # Selected_charge_states is a tuple with the actual charge state, and the protonation it belongs to - selected_charge_states.append([c,p]) - if debug >= 2: print(f" POSCHARGE. poscharge added with corrected charge: {c.corr_total_charge} and uncorrected: {c.uncorr_total_charge}") - if debug >= 2: print(f" POSCHARGE. poscharge added with smiles: {c.smiles}") - - ### HERE IS HAS FINISHED WITH ALL PROTONATIONS - if len(selected_charge_states) == 0: # Means that no good option has been found - Warning = True - else: - Warning = False - - return selected_charge_states, Warning - -####################################################### -def get_nitrosyl_geom(ligand: object, debug: int=0) -> str: - # Function that determines whether the M-N-O angle of a Nitrosyl "ligand" is "Bent" or "Linear" - # Each case is treated differently - #:return NO_type: "Linear" or "Bent" - - for idx, a in enumerate(ligand.atoms): - if a.label == "N": - central = a.coord.copy() - if a.label == "O": - extreme = a.coord.copy() - - dist = [] - for idx, met in enumerate(ligand.metalatoms): - metal = np.array(met.coord) - dist.append(np.linalg.norm(central - metal)) - tgt = np.argmin(dist) - - metal = ligand.metalatoms[tgt].coord.copy() - if debug >= 2: print("NITRO coords:", central, extreme, metal) - - vector1 = np.subtract(np.array(central), np.array(extreme)) - vector2 = np.subtract(np.array(central), np.array(metal)) - if debug >= 2: print("NITRO Vectors:", vector1, vector2) - - angle = getangle(vector1, vector2) - if debug >= 2: print("NITRO ANGLE:", angle, np.degrees(angle)) - - if np.degrees(angle) > float(160): NO_type = "Linear" - else: NO_type = "Bent" - - return str(NO_type) - -####################################################### -def get_metal_poscharge(metal: object, molecule: object, debug: int=0) -> list: - ## Retrieves plausible oxidation states for a given metal - # Data Obtained from: - # Venkataraman, D.; Du, Y.; Wilson, S. R.; Hirsch, K. A.; Zhang, P.; Moore, J. S. A - # Coordination Geometry Table of the D-Block Elements and Their Ions. - # J. Chem. Educ. 1997, 74, 915. - - atnum = elemdatabase.elementnr[metal.label] - - at_charge = defaultdict(list) - # 1st-row transition metals. - at_charge[21] = [3] # Sc - at_charge[22] = [2, 3, 4] # Ti - at_charge[23] = [1, 2, 3, 4, 5] # V - at_charge[24] = [0, 2, 3] # Cr ; including 5 leads to worse results - at_charge[25] = [1, 2, 3] # Mn - at_charge[26] = [2, 3] # Fe - at_charge[27] = [1, 2, 3] # Co - at_charge[28] = [2, 3] # Ni - at_charge[29] = [1, 2] # Cu - at_charge[30] = [2] # Zn - # 2nd-row transition metals. - at_charge[39] = [3] # Y - at_charge[40] = [2, 3, 4] # Zr - at_charge[41] = [1, 3, 4, 5] # Nb - at_charge[42] = [0, 2, 4, 5, 6] # Mo - at_charge[43] = [1, 2, 3, 4, 5] # Tc - at_charge[44] = [2, 3, 4] # Ru - at_charge[45] = [1, 2, 3] # Rh - at_charge[46] = [0, 2] # Pd - at_charge[47] = [1] # Ag - at_charge[48] = [2] # Cd - # 3rd-row transition metals. - at_charge[57] = [] # La - at_charge[72] = [4] # Hf - at_charge[73] = [2, 3, 4, 5] # Ta - at_charge[74] = [0, 2, 4, 5, 6] # W - at_charge[75] = [1, 2, 3, 4, 5, 7] # Re - at_charge[76] = [2, 3, 4, 5, 6] # Os - at_charge[77] = [1, 3] # Ir - at_charge[78] = [0, 2, 4] # Pt - at_charge[79] = [1, 3] # Au - at_charge[80] = [2] # Hg - - poscharge = at_charge[atnum] - - list_of_zero_OS = ["Fe", "Ni", "Ru"] - if metal.label in list_of_zero_OS: - # In some cases, it adds 0 as possible metal charge - # -if it has CO ligands - if any((lig.natoms == 2 and "C" in lig.labels and "O" in lig.labels) for lig in molecule.ligandlist): - if int(0) not in poscharge: - poscharge.append(int(0)) - # -if it has any ligand with hapticity - if any((lig.hapticity) for lig in molecule.ligandlist): - if int(0) not in poscharge: - poscharge.append(int(0)) - - return poscharge - - -####################################################### -def prepare_unresolved(unique_indices: list, unique_species: list, distributions: list, debug: int=0): - - list_molecules = [] - list_indices = [] - list_options = [] - if debug >= 2: print("") - for idx, spec_tuple in enumerate(unique_species): - if spec_tuple[0] == "Metal": - pos = [jdx for jdx, uni in enumerate(unique_indices) if uni == idx] - if debug >= 2: print(f"UNRESOLVED: found metal in positions={pos} of the distribution") - values = [] - for distr in distributions: - values.append(distr[pos[0]]) - options = list(set(values)) - if debug >= 2: print(f"UNRESOLVED: list of values={values}\n") - if debug >= 2: print(f"UNRESOLVED: options={options}\n") - - if len(options) > 1: - list_molecules.append(spec_tuple[2]) - list_indices.append(spec_tuple[1].atlist) - list_options.append(options) - - return list_molecules, list_indices, list_options - -####################################################### -def balance_charge(unique_indices: list, unique_species: list, debug: int=0) -> list: - - # Function to Select the Best Charge Distribution for the unique species. - # It accepts multiple charge options for each molecule/ligand/metal (poscharge, etc...). - # NO: It should select the best one depending on whether the final metal charge makes sense or not. - # In some cases, can accept metal oxidation state = 0, if no other makes sense - - iserror = False - iterlist = [] - for idx, spec_tuple in enumerate(unique_species): - spec = spec_tuple[1] - toadd = [] - if len(spec.poscharge) == 1: - toadd.append(spec.poscharge[0]) - elif len(spec.poscharge) > 1: - for tch in spec.poscharge: - toadd.append(tch) - elif len(spec.poscharge) == 0: - iserror = True - toadd.append("-") - iterlist.append(toadd) - - if debug >= 2: print("BALANCE: iterlist", iterlist) - if debug >= 2: print("BALANCE: unique_indices", unique_indices) - - if not iserror: - tmpdistr = list(itertools.product(*iterlist)) - if debug >= 2: print("BALANCE: tmpdistr", tmpdistr) - - # Expands tmpdistr to include same species, generating alldistr: - alldistr = [] - for distr in tmpdistr: - tmp = [] - for u in unique_indices: - tmp.append(distr[u]) - alldistr.append(tmp) - if debug >= 2: print("BALANCE: alldistr added:", tmp) - - final_charge_distribution = [] - for idx, d in enumerate(alldistr): - final_charge = np.sum(d) - if final_charge == 0: - final_charge_distribution.append(d) - elif iserror: - if debug >= 1: print("Error found in BALANCE: one species has no possible charges") - final_charge_distribution = [] - - return final_charge_distribution - -####################################################### -def define_sites(ligand: object, molecule: object, debug: int=0) -> list: - # This function is the heart of the program. - # It decides whether a connected atom must be temporarily added an extra element - # when running the getcharge function - # It does so in order to generate a meaningful connectivity for the ligand, - # without any additional difficulty due to the missing M-L bond(s) - - # The function works on all "groups" of a ligand, - # which are groups of atoms that are all connected among themselves and to the metal - # For instance, in a Cp ligand, all C atoms belong to the same group. - # Also, for the ligand CN, C is the only group - - # Groups are identified in the part of the code that does the cell reconstruction, and have properties. - # One of this properties is whether they form haptic bonds. - # Haptic groups require a catalogue of rules, since they behave differently from regular groups (i.e. non-haptic) - # These rules are defined below. - - # If a group is not haptic. Then rules are applied depending on the type of the connected atom. - # Ideally, these rules can be defined depending on the adjacency of the atom. - # In some cases, this is not the case, and so we resort to an actual connectivity, - # in what I call a non-local approach. - - # The non-local approach consists of generating a tentative connectivity using xyz2mol, assuming a neutral charge. - # This is not necessarily true, but hopefully the connectivity of the connected atom will be correct. - - list_of_protonations = [] - - natoms = len(ligand.labels) - newlab = ligand.labels.copy() - newcoord = ligand.coord.copy() - - # Variables that control how many atoms have been added. - tmp_added_atoms = 0 - added_atoms = 0 - - # Boolean that decides whether a non-local approach is needed - non_local_groups = 0 - needs_nonlocal = False - - # Initialization of the variables - addedlist = np.zeros((natoms)).astype(int) - block = np.zeros((natoms)).astype(int) - metal_electrons = np.zeros((natoms)).astype(int) # It will remain as such - elemlist = np.empty((natoms)).astype(str) - - # Program runs sequentially for each group of the ligand - for g in ligand.grouplist: - - ######################## - # Cases with Hapticity # - ######################## - if len(g.hapttype) > 0: # then the group has hapticity - Selected_Hapticity = False - if debug >= 2: print(" DEFINE_SITES: addressing group with hapticity:", g.hapttype) - - if "h5-Cp" in g.hapttype and not Selected_Hapticity: - if debug >= 2: print(" DEFINE_SITES: It is an h5-Cp with atlist:", g.atlist) - Selected_Hapticity = True - tobeadded = 1 - tmp_added_atoms = 0 - for idx, a in enumerate(ligand.atoms): - # print(idx, a.label, a.mconnec) - if idx in g.atlist and a.mconnec == 1: - if tmp_added_atoms < tobeadded: - elemlist[idx] = "H" - addedlist[idx] = 1 - tmp_added_atoms += 1 - # print("added") - else: - block[idx] = 1 - - elif "h7-Cicloheptatrienyl" in g.hapttype and not Selected_Hapticity: - if debug >= 2: print(" DEFINE_SITES: It is an h7-Cicloheptatrienyl") - Selected_Hapticity = True - tobeadded = 1 - tmp_added_atoms = 0 - for idx, a in enumerate(ligand.atoms): - if idx in g.atlist and a.mconnec == 1: - if tmp_added_atoms < tobeadded: - elemlist[idx] = "H" - addedlist[idx] = 1 - tmp_added_atoms += 1 - else: - block[idx] = 1 - - elif "h5-AsCp" in g.hapttype and not Selected_Hapticity: - if debug >= 2: print(" DEFINE_SITES: It is an h5-AsCp") - Selected_Hapticity = True - - # Rules change depending on whether the ring is substituted or not - issubstituted = False - for idx, a in enumerate(ligand.atoms): - if idx in g.atlist and a.mconnec == 1: - for jdx in a.adjacency: - if ligand.labels[jdx] != "As": - issubstituted = True - if issubstituted: - tobeadded = 0 - else: - tobeadded = 1 - - tmp_added_atoms = 0 - for idx, a in enumerate(ligand.atoms): - if idx in g.atlist and a.mconnec == 1: - if tmp_added_atoms < tobeadded: - elemlist[idx] = "H" - addedlist[idx] = 1 - tmp_added_atoms += 1 - else: - block[idx] = 1 - - elif "h5-Pentaphosphole" in g.hapttype and not Selected_Hapticity: ## Case of IMUCAX - if debug >= 2: print(" DEFINE_SITES: It is an h5-Pentaphosphole") - Selected_Hapticity = True - - # Rules change depending on whether the ring is substituted or not - issubstituted = False - for idx, a in enumerate(ligand.atoms): - if idx in g.atlist and a.mconnec == 1: - for jdx in a.adjacency: - if ligand.labels[jdx] != "P": - issubstituted = True - if issubstituted: - tobeadded = 0 - else: - tobeadded = 1 - - tmp_added_atoms = 0 - for idx, a in enumerate(ligand.atoms): - if idx in g.atlist and a.mconnec == 1: - if tmp_added_atoms < tobeadded: - elemlist[idx] = "H" - addedlist[idx] = 1 - tmp_added_atoms += 1 - else: - block[idx] = 1 - - elif ("h3-Allyl" in g.hapttype or "h3-Cp" in g.hapttype) and not Selected_Hapticity: - # if "h3-Allyl" or "h3-Cp" in g.hapttype: - if debug >= 2: print(" DEFINE_SITES: It is either an h3-Allyl or an h3-Cp") - Selected_Hapticity = True - tobeadded = 1 - tmp_added_atoms = 0 - for idx, a in enumerate(ligand.atoms): - if idx in g.atlist and a.mconnec == 1: - if tmp_added_atoms < tobeadded: - elemlist[idx] = "H" - addedlist[idx] = 1 - tmp_added_atoms += 1 - else: - block[idx] = 1 - - elif ("h4-Benzene" in g.hapttype or "h4-Butadiene" in g.hapttype) and not Selected_Hapticity: - if debug >= 2: print(" DEFINE_SITES: It is either an h4-Benzene or an h4-Butadiene") - if debug >= 2: print(" DEFINE_SITES: No action is required") - Selected_Hapticity = True - tobeadded = 0 - for idx, a in enumerate(ligand.atoms): - if idx in g.atlist and a.mconnec == 1: - block[idx] = 1 - - elif ("h2-Benzene" in g.hapttype or "h2-Butadiene" or "h2-ethylene" in g.hapttype) and not Selected_Hapticity: - if debug >= 2: print(" DEFINE_SITES: It is either an h2-Benzene, an h2-ethylene or an h2-Butadiene") - if debug >= 2: print(" DEFINE_SITES: No action is required") - Selected_Hapticity = True - tobeadded = 0 - for idx, a in enumerate(ligand.atoms): - if idx in g.atlist and a.mconnec == 1: - block[idx] = 1 - - elif "h4-Enone" in g.hapttype and not Selected_Hapticity: - if debug >= 2: print(" DEFINE_SITES: It is an h4-Enone") - if debug >= 2: print(" DEFINE_SITES: No action is required") - Selected_Hapticity = True - tobeadded = 0 - for idx, a in enumerate(ligand.atoms): - if idx in g.atlist and a.mconnec == 1: - block[idx] = 1 - - # If the group hapticity type is not recognized -or instructions are not defined-, nothing is done - if not Selected_Hapticity: - if debug >= 2: print(f" DEFINE_SITES: {g.hapttype} not recognized or new rules are necessary") - - else: # cases without hapticity - ions = ["F", "Cl","Br","I","As"] # Atoms for which an H atom is always added - ########################### - # Cases with No Hapticity # - ########################### - # An initial attempt to add elements based on the adjacency of the connected atom - for idx in g.atlist: - a = ligand.atoms[idx] - if debug >= 2: print(f" DEFINE_SITES: evaluating non-haptic group with index {idx} and label {a.label}") - # Simple Ionic Case - if a.label in ions: - if a.connec == 0: - elemlist[idx] = "H" - addedlist[idx] = 1 - elif a.connec >= 1: - block[idx] = 1 - # Oxigen - elif a.label == "O" or a.label == "S" or a.label == "Se": - if a.connec == 1: - needs_nonlocal = True - non_local_groups += 1 - if debug >= 2: print(f" DEFINE_SITES: will be sent to nonlocal due to {a.label} atom") - elif a.connec > 1: - block[idx] = 1 - # SERGI: I'm considering a different one with S and Se - # elif a.label == "S" or a.label == "Se": - # if a.connec == 1: - # elemlist[idx] = "H" - # addedlist[idx] = 1 - # Hydrides - elif a.label == "H": - if a.connec == 0: - elemlist[idx] = "Cl" - addedlist[idx] = 1 - else: - block[idx] = 1 - # Nitrogen - elif a.label == "N": - # Nitrosyl - if ligand.natoms == 2 and "O" in ligand.labels: - NO_type = get_nitrosyl_geom(ligand) - if NO_type == "Linear": - if debug >= 2: print(" DEFINE_SITES: Found Linear Nitrosyl") - elemlist[idx] = "O" - addedlist[idx] = 2 - metal_electrons[idx] = 1 - elif NO_type == "Bent": - if debug >= 2: print(" DEFINE_SITES: Found Bent Nitrosyl") - elemlist[idx] = "H" - addedlist[idx] = 1 - else: - # nitrogen with at least 3 adjacencies doesnt need H - if a.connec >= 3: - block[idx] = 1 - else: - # Checks for adjacent Atoms - list_of_adj_atoms = [] - for i in a.adjacency: - list_of_adj_atoms.append(ligand.labels[i]) - numN = list_of_adj_atoms.count("N") - if numN == 2: # triazole or tetrazole - elemlist[idx] = "H" - addedlist[idx] = 1 - else: - needs_nonlocal = True - non_local_groups += 1 - if debug >= 2: print(f" DEFINE_SITES: will be sent to nonlocal due to {a.label} atom") - # Phosphorous - elif (a.connec == 3) and a.label == "P": - block[idx] = 1 - # Case of Carbon (Simple CX vs. Carbenes) - elif a.label == "C": - if ligand.natoms == 2: - # CN - if "N" in ligand.labels: - elemlist[idx] = "H" - addedlist[idx] = 1 - # CO - if "O" in ligand.labels: - block[idx] = 1 - # Added for amides - elif (any(ligand.labels[i] == "O" for i in a.adjacency) and any(ligand.labels[i] == "N" for i in a.adjacency) and a.connec == 2 ): - elemlist[idx] = "H" - addedlist[idx] = 1 - else: - iscarbene, tmp_element, tmp_added, tmp_metal = check_carbenes(a, ligand, molecule) - if debug >= 2: print(f" DEFINE_SITES: Evaluating as carbene and {iscarbene}") - if iscarbene: - # Carbene identified - elemlist[idx] = tmp_element - addedlist[idx] = tmp_added - metal_electrons[idx] = tmp_metal - else: - needs_nonlocal = True - non_local_groups += 1 - if debug >= 2: print(f" DEFINE_SITES: will be sent to nonlocal due to {a.label} atom") - # Silicon - elif a.label == "Si": - if a.connec < 4: - elemlist[idx] = "H" - addedlist[idx] = 1 - else: - block[idx] - # Boron - elif a.label == "B": - if a.connec < 4: - elemlist[idx] = "H" - addedlist[idx] = 1 - else: - block[idx] - # None of the previous options - else: - if not needs_nonlocal: - needs_nonlocal = True - non_local_groups += 1 - if debug >= 2: print(f" DEFINE_SITES: will be sent to nonlocal due to {a.label} atom with no rules") - - # If, at this stage, we have found that any atom must be added, this is done before entering the non_local part. - # The block variable makes that more atoms cannot be added to these connected atoms - for idx, a in enumerate(ligand.atoms): - if addedlist[idx] != 0 and block[idx] == 0: - isadded, newlab, newcoord = add_atom(newlab, newcoord, idx, ligand, molecule.metalist, elemlist[idx]) - if isadded: - added_atoms += addedlist[idx] - block[idx] = 1 # No more elements will be added to those atoms - if debug >= 2: print(f" DEFINE_SITES: Added {elemlist[idx]} to atom {idx} with: a.mconnec={a.mconnec} and label={a.label}") - else: - addedlist[idx] = 0 - block[idx] = 1 # No more elements will be added to those atoms - - ############################ - ###### NON-LOCAL PART ###### - ############################ - - if not needs_nonlocal: - new_prot = protonation(newlab, newcoord, ligand.factor, added_atoms, addedlist, block, metal_electrons, elemlist) - list_of_protonations.append(new_prot) - else: - # Generate the new adjacency matrix after local elements have been added to be sent to xyz2mol - local_labels = newlab.copy() - local_coords = newcoord.copy() - local_radii = getradii(local_labels) - local_natoms = len(local_labels) - local_atnums = [int_atom(label) for label in local_labels] # from xyz2mol.py - dummy, local_conmat, local_connec, dummy, dummy = getconec(local_labels, local_coords, ligand.factor, local_radii) - - local_addedlist = addedlist.copy() - local_block = block.copy() - local_added_atoms = added_atoms - - # Initiate variables - avoid = ["Si", "P"] - - if debug >= 2: print(" ") - if debug >= 2: print(f" DEFINE_SITES: Enters non-local with:") - if debug >= 2: print(f" DEFINE_SITES: block: {block}") - if debug >= 2: print(f" DEFINE_SITES: addedlist: {addedlist}") - if debug >= 2: print(f" DEFINE_SITES: {non_local_groups} non_local_groups groups found") - - # CREATES ALL COMBINATIONS OF PROTONATION STATES# - # Creates [0,1] tuples for each non_local protonation site - tmp = [] - for kdx in range(0,non_local_groups): - tmp.append([0,1]) - - if len(tmp) > 1: - combinations = list(itertools.product(*tmp)) - combinations.sort(key=sum) - else: - combinations = [0,1] - - for com in combinations: - newlab = local_labels.copy() - newcoord = local_coords.copy() - if debug >= 2: print(f" ") - if debug >= 2: print(f" DEFINE_SITES: doing combination {com}") - metal_electrons = np.zeros((local_natoms)).astype(int) ## Electrons Contributed to the Metal - elemlist = np.empty((local_natoms)).astype(str) - # block and addedlist are inherited from LOCAL - addedlist = local_addedlist.copy() - block = local_block.copy() - added_atoms = local_added_atoms - non_local_added_atoms = 0 - - os = np.sum(com) - toallocate = int(0) - for jdx, a in enumerate(ligand.atoms): - if a.mconnec >= 1 and a.label not in avoid and block[jdx] == 0: - if non_local_groups > 1: - if com[toallocate] == 1: - elemlist[jdx] = "H" - addedlist[jdx] = 1 - isadded, newlab, newcoord = add_atom(newlab, newcoord, jdx, ligand, molecule.metalist, elemlist[jdx]) - if isadded: - added_atoms += addedlist[jdx] - if debug >= 2: print(f" DEFINE_SITES: Added {elemlist[jdx]} to atom {jdx} with: a.mconnec={a.mconnec} and label={a.label}") - else: - addedlist[idx] = 0 - block[idx] = 1 # No more elements will be added to those atoms - elif non_local_groups == 1: - if com == 1: - elemlist[jdx] = "H" - addedlist[jdx] = 1 - isadded, newlab, newcoord = add_atom(newlab, newcoord, jdx, ligand, molecule.metalist, elemlist[jdx]) - if isadded: - added_atoms += addedlist[jdx] - if debug >= 2: print(f" DEFINE_SITES: Added {elemlist[jdx]} to atom {jdx} with: a.mconnec={a.mconnec} and label={a.label}") - else: - addedlist[idx] = 0 - block[idx] = 1 # No more elements will be added to those atoms - #in any case, moves index - toallocate += 1 - - smi = " " - - new_prot = protonation(newlab, newcoord, ligand.factor, added_atoms, addedlist, block, metal_electrons, elemlist, smi, os, typ="Non-local") - if new_prot.status == 1 and new_prot.added_atoms == os+local_added_atoms: - list_of_protonations.append(new_prot) - if debug >= 2: print(f" DEFINE_SITES: Protonation SAVED with {added_atoms} atoms added to ligand. status={new_prot.status}") - else: - if debug >= 2: print(f" DEFINE_SITES: Protonation DISCARDED. Steric Clashes found when adding atoms. status={new_prot.status}") - - return list_of_protonations - -####################################################### -def prepare_mols(moleclist: list, unique_indices: list, unique_species: list, selected_charge_states: list, final_charge_distribution: list, debug: int=0) -> Tuple[list, bool]: - # The charge and connectivity of a given specie in the unit cell is only determined for one representative case. i - # For instance, if four molecules "A" are in the unit cell, only one is evaluated in the rest of the code. - # This function ensures that all other "A" molecules in the unit cell end up having the same interpretation (charge and connectivity). - # In some cases, this might be a difficult job, since the connectivity (i.e. Lewis Structure) often depends on the atom ordering, which might change - # Thus, an Hungarian ordering is implemented. - - Warning = False - idxtoallocate = 0 - # prints SUMMARY at start - if debug >= 1: - print(f"PREPARE: {len(selected_charge_states)} selected_charge_states received") - print("") - print(f"PREPARE: {len(moleclist)} molecules to prepare, of types") - for idx, mol in enumerate(moleclist): - print(f"PREPARE: Molecule {idx} is a {mol.type} with formula {mol.formula}"), - print("") - - for idx, sel in enumerate(selected_charge_states): - for jdx, opt in enumerate(sel): - chstate = opt[0] - prot = opt[1] - if debug >= 2: print(f"PREPARE: State {idx} and option {jdx}. Target state and protonation received with {chstate.corr_total_charge} and {prot.added_atoms}") - - for idx, mol in enumerate(moleclist): - if debug >= 2: print("") - if debug >= 2: print(f"PREPARE: Molecule {idx} is a {mol.type} with formula {mol.formula}"), - - ################################### - ### FOR SOLVENT AND COUNTERIONS ### - ################################### - if mol.type == "Other": - specie = unique_indices[idxtoallocate] - spec_object = unique_species[specie][1] - if debug >= 2: print(f"PREPARE: Specie number={specie} with molecule poscharges: {spec_object.poscharge}") - if debug >= 2: print(f"PREPARE: Doing molecule {idx} with idxtoallocate: {idxtoallocate}") - - allocated = False - for jdx, ch in enumerate(spec_object.poscharge): - if final_charge_distribution[idxtoallocate] == ch and not allocated: # If the charge in poscharges coincides with the one for this entry in final_distribution - - tgt_charge_state = selected_charge_states[specie][jdx][0] - tgt_protonation = selected_charge_states[specie][jdx][1] - if debug >= 2: print(f"PREPARE: target state and protonation loaded, with {tgt_charge_state.corr_total_charge} and {tgt_protonation.added_atoms}") - allocated = True - ch_state = getcharge(mol.labels, mol.coord, mol.conmat, ch, mol.factor) - ch_state.correction(tgt_protonation.addedlist, tgt_protonation.metal_electrons, tgt_protonation.elemlist) - - if ch_state.corr_total_charge == tgt_charge_state.corr_total_charge: - mol.charge(ch_state.corr_atom_charges, ch_state.corr_total_charge, ch_state.rdkit_mol, ch_state.smiles) - if debug >= 2: print(f"PREPARE: Success doing molecule {idx}. Created Charge State with total_charge={ch_state.corr_total_charge}") - else: - if debug >= 2: print(f"PREPARE: Error doing molecule {idx}. Created Charge State is different than Target") - Warning = True - - if allocated: - idxtoallocate += 1 - else: - idxtoallocate += 1 - if debug >= 2: print(f"PREPARE: Warning allocating molecule {idx} with {final_charge_distribution[idxtoallocate]} as target charge") - Warning = True - - ########################### - ###### FOR LIGANDS ###### - ########################### - elif mol.type == "Complex": - if debug >= 2: print(f"PREPARE: Molecule {moleclist.index(mol)} has {len(mol.ligandlist)} ligands") - - for kdx, lig in enumerate(mol.ligandlist): - #while not Warning: - #try: - allocated = False - specie = unique_indices[idxtoallocate] - spec_object = unique_species[specie][1] - if debug >= 2: print("") - if debug >= 2: print(f"PREPARE: Ligand {kdx}, formula: {lig.formula} is specie {specie}") - if debug >= 2: print(f"PREPARE: Ligand poscharges: {spec_object.poscharge}") - if debug >= 2: print(f"PREPARE: Doing ligand {kdx} with idxtoallocate {idxtoallocate}") - - if lig.natoms == 2 and "N" in lig.labels and "O" in lig.labels: - isnitrosyl = True - else: - isnitrosyl = False - - for jdx, ch in enumerate(spec_object.poscharge): - if debug >= 2: print(f"PREPARE: Doing {ch} of options {spec_object.poscharge}. jdx={jdx}") - tgt_charge_state = selected_charge_states[specie][jdx][0] - tgt_protonation = selected_charge_states[specie][jdx][1] - if debug >= 2: print(f"PREPARE: Found Target Prot State with {tgt_protonation.added_atoms} added atoms and {tgt_protonation.os} OS") - if debug >= 2: print(f"PREPARE: addedlist of Target Prot State: {tgt_protonation.addedlist}") - - if final_charge_distribution[idxtoallocate] == ch and not allocated: - allocated = True - - # RE-RUNS the Charge assignation for same-type molecules in the cell - list_of_protonations = define_sites(lig, mol, debug=1) - found_prot = False - - # Hungarian sort - issorted = False - if not lig.hapticity: - tini_hun = time.time() - - # Adding connectivity data to labels to improve the hungarian sort - ligand_data = [] - ref_data = [] - for a in lig.atoms: - ligand_data.append(a.label+str(a.mconnec)) - for a in spec_object.atoms: - ref_data.append(a.label+str(a.mconnec)) - dummy1, dummy2, map12 = reorder(ref_data, ligand_data, spec_object.coord, lig.coord) - - issorted = True - tend_hun = time.time() - ############### - - for p in list_of_protonations: - if debug >= 2: print(f"PREPARE: evaluating prot state with added_atoms={p.added_atoms}")#, addedlist={p.addedlist}") - if p.os == tgt_protonation.os and p.added_atoms == tgt_protonation.added_atoms and not found_prot: - if issorted: - tmp_addedlist = list(np.array(p.addedlist)[map12]) - else: - tmp_addedlist = p.addedlist - if debug >= 2: print(f"PREPARE: tmp_addedlist={tmp_addedlist}") - if all(tmp_addedlist[idx] == tgt_protonation.addedlist[idx] for idx in range(len(p.addedlist))): - if debug >= 2: print(f"PREPARE: found match in protonation with tmpsmiles:{p.tmpsmiles}") - prot = p - found_prot = True - - #### Evaluates possible charges except if the ligand is a nitrosyl - if found_prot: - if isnitrosyl: - NO_type = get_nitrosyl_geom(lig) - if NO_type == "Linear": NOcharge = 1 #NOcharge is the charge with which I need to run getcharge to make it work - if NO_type == "Bent": NOcharge = 0 - - ch_state = getcharge(prot.labels, prot.coordinates, prot.conmat, NOcharge, prot.factor) - ch_state.correction(prot.addedlist, prot.metal_electrons, prot.elemlist) - - if debug >= 2: print(f"PREPARE: Found Nitrosyl of type= {NO_type}") - if debug >= 2: print(f"PREPARE: Wanted charge {ch}, obtained: {ch_state.corr_total_charge}") - if debug >= 2: print(f"PREPARE: smiles: {ch_state.smiles}") - else: - if debug >= 2: print(f"PREPARE: Sending getcharge with prot.added_atoms={prot.added_atoms} to obtain charge {ch}") - ch_state = getcharge(prot.labels, prot.coordinates, prot.conmat, ch+prot.added_atoms, prot.factor, tgt_charge_state.allow) - ch_state.correction(prot.addedlist, prot.metal_electrons, prot.elemlist) - - if debug >= 2: print(f"PREPARE: Wanted charge {ch}, obtained: {ch_state.corr_total_charge}") - if debug >= 2: print(f"PREPARE: smiles: {ch_state.smiles}") - - if ch_state.corr_total_charge != ch: - if debug >= 1: print(f"PREPARE: WARNING: total charge obtained without correction {ch_state.corr_total_charge} while it should be {ch}") - Warning = True - else: - lig.charge(ch_state.corr_atom_charges, ch_state.corr_total_charge, ch_state.rdkit_mol, ch_state.smiles) - if debug >= 1: print(f"PREPARE: Success doing ligand {kdx}. Created Charge State with total_charge={ch_state.corr_total_charge}") - - else: - if debug >= 1: print(f"PREPARE: WARNING, I Could not identify the protonation state. I'll try to obtain the desired result") - found_charge_state = False - for prot in list_of_protonations: - list_of_charge_states = [] - list_of_protonations_for_each_state = [] - - tmpobject = ["Ligand", lig, mol] - chargestried = get_list_of_charges_to_try(tmpobject, prot) - for ich in chargestried: - ch_state = getcharge(prot.labels, prot.coordinates, prot.conmat, ich, prot.factor) - ch_state.correction(prot.addedlist, prot.metal_electrons, prot.elemlist) - list_of_charge_states.append(ch_state) - list_of_protonations_for_each_state.append(prot) - if debug >= 1: print(f" POSCHARGE: charge 0 with smiles {ch_state.smiles}") - - if len(list_of_charge_states) > 0: - best_charge_distr_idx = select_charge_distr(list_of_charge_states, debug=debug) - else: - if debug >= 1: print(f" POSCHARGE. found EMPTY best_charge_distr_idx for PROTONATION state") - best_charge_distr_idx = [] - - if debug >= 2: print(f" POSCHARGE. best_charge_distr_idx={best_charge_distr_idx}") - for idx in best_charge_distr_idx: - c = list_of_charge_states[idx] - p = list_of_protonations_for_each_state[idx] - if debug >= 2: print(f" POSCHARGE. {c.corr_total_charge}={ch}, {p.added_atoms}={tgt_protonation.added_atoms}") - if c.corr_total_charge == ch and p.added_atoms == tgt_protonation.added_atoms: - lig.charge(c.corr_atom_charges, c.corr_total_charge, c.rdkit_mol, c.smiles) - if debug >= 1: print(f"PREPARE: Success doing ligand {kdx}. Created Charge State with total_charge={c.corr_total_charge}") - found_charge_state = True - - if not found_charge_state: Warning = True - if allocated: - idxtoallocate += 1 - else: - idxtoallocate += 1 - if debug >= 1: print(f"PREPARE: Warning allocating molecule {idx} with {final_charge_distribution[idxtoallocate]} as target charge") - Warning = True - - for kdx, met in enumerate(mol.metalist): - specie = unique_indices[idxtoallocate] - spec_object = unique_species[specie][1] - allocated = False - if debug >= 2: print("") - if debug >= 2: print(f"PREPARE: Metal {kdx}, label {met.label} is specie {specie}") - if debug >= 2: print(f"PREPARE: Metal poscharges: {spec_object.poscharge}") - for jdx, ch in enumerate(spec_object.poscharge): - if final_charge_distribution[idxtoallocate] == ch and not allocated: - allocated = True - met.charge(ch) - if allocated: - idxtoallocate += 1 - - if not Warning: - # Now builds the Charge Data for the final molecule. Smiles is a list with all ligand smiles separately. - if debug >= 2: print(f"PREPARE: Building Molecule {idx} From Ligand&Metal Information") - tmp_atcharge = np.zeros((mol.natoms)) - tmp_smiles = [] - for lig in mol.ligandlist: - tmp_smiles.append(lig.smiles) - for kdx, a in enumerate(lig.atlist): - tmp_atcharge[a] = lig.atcharge[kdx] - for met in mol.metalist: - tmp_atcharge[met.atlist] = met.totcharge - - mol.charge(tmp_atcharge, int(sum(tmp_atcharge)), [], tmp_smiles) - - return moleclist, Warning - -####################################################### -def build_bonds(moleclist: list, debug: int=0) -> list: - ## Builds bond data for all molecules - ## Now that charges are known, we use the rdkit-objects with the correct charge to do that - ## Bond entries are defined in the mol and lig objects - - ####### - # First Part. Creates Bonds for Non-Complex Molecules - ####### - if debug >= 2: print("") - if debug >= 2: print("BUILD_BONDS: Doing 1st Part") - if debug >= 2: print("###########################") - for mol in moleclist: - if not mol.iscomplex: - if debug >= 2: print(f"BUILD BONDS: doing mol with mol.natoms={mol.natoms}") - # Checks that the gmol and rdkit-mol objects have same order - for idx, a in enumerate(mol.atoms): - - # Security Check. Confirms that the labels are the same - #if debug >= 2: print("BUILD BONDS: atom", idx, a.label) - rdkitatom = mol.rdkit_mol.GetAtomWithIdx(idx) - tmp = rdkitatom.GetSymbol() - if a.label != tmp: print("Error in Build_Bonds. Atom labels do not coincide. GMOL vs. MOL:",a.label,tmp) - else: - # First part. Creates bond information - for b in rdkitatom.GetBonds(): - bond_startatom = b.GetBeginAtomIdx() - bond_endatom = b.GetEndAtomIdx() - bond_order = b.GetBondTypeAsDouble() - if mol.atoms[bond_endatom].label != mol.rdkit_mol.GetAtomWithIdx(bond_endatom).GetSymbol(): - if debug >= 1: print("Error with Bond EndAtom",mol.atoms[bond_endatom].label,mol.rdkit_mol.GetAtomWithIdx(bond_endatom).GetSymbol()) - else: - if bond_endatom == idx: - start = bond_endatom - end = bond_startatom - elif bond_startatom == idx: - start = bond_startatom - end = bond_endatom - new_bond = bond(mol.atoms[start], mol.atoms[end], bond_order) ## This has changed. Now there is a bond object, and we send the atom objects, not only the index - a.add_bond(new_bond) - - if debug >= 2: print("") - if debug >= 2: print("BUILD_BONDS: Doing 2nd Part") - if debug >= 2: print("###########################") - - ####### - # 2nd Part. Creates Ligand Information - ####### - for mol in moleclist: - if debug >= 2: print(f"BUILD BONDS: doing mol {mol.formula} with Natoms {mol.natoms}") - if mol.iscomplex: - for lig in mol.ligands: - if debug >= 2: print(f"BUILD BONDS: doing ligand {lig.formula}") - - for idx, a in enumerate(lig.atoms): - # Security Check. Confirms that the labels are the same - rdkitatom = lig.rdkit_mol.GetAtomWithIdx(idx) - tmp = rdkitatom.GetSymbol() - if a.label != tmp: print(f"Error in Build_Bonds. Atom labels do not coincide. GMOL vs. MOL: {a.label} {tmp}") - else: - # First part. Creates bond information - for b in rdkitatom.GetBonds(): - bond_startatom = b.GetBeginAtomIdx() - bond_endatom = b.GetEndAtomIdx() - bond_order = b.GetBondTypeAsDouble() - if bond_startatom >= lig.natoms or bond_endatom >= lig.natoms: - continue - else: - if lig.atoms[bond_endatom].label != lig.rdkit_mol.GetAtomWithIdx(bond_endatom).GetSymbol(): - if debug >= 1: print( "Error with Bond EndAtom",lig.atoms[bond_endatom].label,lig.rdkit_mol.GetAtomWithIdx(bond_endatom).GetSymbol()) - else: - if bond_endatom == idx: - start = bond_endatom - end = bond_startatom - elif bond_startatom == idx: - start = bond_startatom - end = bond_endatom - new_bond = bond(mol.atoms[start], mol.atoms[end], bond_order) ## This has changed. Now there is a bond object, and we send the atom objects, not only the index - a.add_bond(new_bond) - - if debug >= 2: print("") - if debug >= 2: print("BUILD_BONDS: Doing 3rd Part") - if debug >= 2: print("###########################") - - ## SERGI: I need to work on that, but I'll do something first - - ######## - ## 3rd Part. Merges Ligand Information into Molecule Object using the atlists - ######## - #for mol in moleclist: - # if debug >= 2: print("BUILD BONDS: doing mol", mol.formula, "with Natoms", mol.natoms) - # if mol.iscomplex: - # allstarts = [] - # allends = [] - # allorders = [] - # # Adds atoms within ligands - # for lig in mol.ligandlist: - # for a in lig.atoms: - # for b in a.bond: - # allstarts.append(lig.atlist[b[0]]) - # allends.append(lig.atlist[b[1]]) - # allorders.append(b[2]) - - # # Adds Metal-Ligand Bonds, with an arbitrary 0.5 order: - # for idx, row in enumerate(mol.mconmat): - # # if debug >= 2: print(row) - # for jdx, val in enumerate(row): - # if val > 0: - # # if debug >= 2: print(idx, jdx, val) - # allstarts.append(idx) - # allends.append(jdx) - # allorders.append(0.5) - - # # I sould work to add Metal-Metal Bonds. Would need to work on the Metal class: - # # Finally, puts everything together, and creates bonds for MOLECULE atom objects - # for idx, a in enumerate(mol.atoms): - # starts = [] - # ends = [] - # orders = [] - # group = [] - # for entry in zip(allstarts, allends, allorders): - # if entry[0] == idx or entry[1] == idx: - # if entry not in group and (entry[1], entry[0], entry[2]) not in group: - # starts.append(entry[0]) - # ends.append(entry[1]) - # orders.append(entry[2]) - # group.append(entry) - - # a.bonds(starts, ends, orders) - - ####### - # 4th Part. Corrects Ligand Smiles to Remove Added H atoms, the old smiles is stored in "lig.smiles_with_H" as it can still be useful - ####### - for mol in moleclist: - if debug >= 2: print("BUILD BONDS: doing mol", mol.formula, "with Natoms", mol.natoms) - if mol.type == "Complex": - mol.smiles = [] - mol.smiles_with_H = [] - for lig in mol.ligandlist: - lig.smiles_with_H = lig.smiles - lig.smiles, lig.rdkit_mol = correct_smiles_ligand(lig) - mol.smiles.append(lig.smiles) - mol.smiles_with_H.append(lig.smiles_with_H) - - return moleclist - - -####################################################### -def check_carbenes(atom: object, ligand: object, molecule: object, debug: int=0) -> Tuple[bool, str, int, int]: - # Function that determines whether a given connected "atom" of a "ligand" of a "molecule" is a carbene - # This function is in progress. Ideally, should be able to identify Fischer, Schrock and N-Heterocyclic Carbenes - # The former two cases probably require rules that involve other ligands in the molecule, hence why the "molecule" is provided - #:return iscarbene: Boolean variable. True/False - #:return element: Type of element that will be later added in the "add_atom" function below - #:return addedlist: List of integers which track in which atom of the ligand we're adding "elements" - #:return metal_electrons: List of integers, similar to addedlist, which track in which atom of the ligand we're counting on metal_electrons. - - # about Metal electrons: This variable is a way to contemplate cases in which the metal atom is actually contributing with electrons to the metal-ligand bond. - # about Metal electrons: In reality, I'm not sure about how to use it correctly, and now is used without much chemical sense - - iscarbene = False - element = "H" - addedlist = 0 - metal_electrons = 0 - - # Initial attempt with Carbenes, but they are much more complex - # Looks for Neighbouring N atoms - list_of_coord_atoms = [] - for i in atom.adjacency: - list_of_coord_atoms.append(ligand.labels[i]) - numN = list_of_coord_atoms.count("N") - - if numN == 2: # it is an N-Heterocyclic carbenes - iscarbene = True - element = "H" - addedlist = 1 - - return iscarbene, element, addedlist, metal_electrons - -############ -def correct_smiles_ligand(lig: object): - ## Receives a ligand class object and constructs the smiles and the rdkit_mol object from scratch, using atoms and bond information - - Chem.rdmolops.SanitizeFlags.SANITIZE_NONE - #### Creates an empty editable molecule - rwlig = Chem.RWMol() - - # Adds atoms with their formal charge - for jdx, a in enumerate(lig.atoms): - rdkit_atom = Chem.Atom(lig.atnums[jdx]) - rdkit_atom.SetFormalCharge(int(a.charge)) - rdkit_atom.SetNoImplicit(True) - rwlig.AddAtom(rdkit_atom) - - # Sets bond information and hybridization - for jdx, a in enumerate(lig.atoms): - nbonds = 0 - for bond in a.bond: - nbonds += 1 - isaromatic = False - if bond[2] == 1.0: btype = Chem.BondType.SINGLE - elif bond[2] == 2.0: btype = Chem.BondType.DOUBLE - elif bond[2] == 3.0: btype = Chem.BondType.TRIPLE - elif bond[2] == 1.5: - btype = Chem.BondType.AROMATIC - rdkit_atom.SetIsAromatic(True) - if bond[0] == jdx and bond[1] > jdx: rwlig.AddBond(bond[0], bond[1], btype) - - if nbonds == 1: hyb = Chem.HybridizationType.S - elif nbonds == 2: hyb = Chem.HybridizationType.SP - elif nbonds == 3: hyb = Chem.HybridizationType.SP2 - elif nbonds == 4: hyb = Chem.HybridizationType.SP3 - else: hyb = Chem.HybridizationType.UNSPECIFIED - rdkit_atom.SetHybridization(hyb) - - # Creates Molecule - obj = rwlig.GetMol() - smiles = Chem.MolToSmiles(obj) - - Chem.SanitizeMol(obj) - Chem.DetectBondStereochemistry(obj, -1) - Chem.AssignStereochemistry(obj, flagPossibleStereoCenters=True, force=True) - Chem.AssignAtomChiralTagsFromStructure(obj, -1) - - return smiles, obj - -################### -### NEW OBJECTS ### -################### - -class protonation(object): - def __init__(self, labels, coordinates, factor, added_atoms, addedlist, block, metal_electrons, elemlist, tmpsmiles=" ", os=int(0), typ="Local"): - self.labels = labels - self.coordinates = coordinates - self.added_atoms = added_atoms - self.addedlist = addedlist - self.block = block - self.metal_electrons = metal_electrons - self.elemlist = elemlist - self.typ = typ - self.factor = factor - self.os = os - self.tmpsmiles = tmpsmiles - - self.radii = getradii(labels) - status, tmpconmat, tmpconnec, tmpmconmat, tmpmconnec = getconec(self.labels, self.coordinates, self.factor, self.radii) - - #if status == 0: - # print("PROTONATION WITH STATUS=0, meaning probable steric clashes") - # for idx in range(len(self.labels)): - # print("%s %.6f %.6f %.6f" % (self.labels[idx], self.coordinates[idx][0], self.coordinates[idx][1], self.coordinates[idx][2])) - - self.status = status # 1 when correct, 0 when steric clashes - self.conmat = tmpconmat - self.connec = tmpconnec -####################################################### - -class charge_state(object): - def __init__(self, status, uncorr_total_charge, uncorr_atom_charges, rdkit_mol, smiles, charge_tried, allow): - self.status = status - self.uncorr_total_charge = uncorr_total_charge - self.uncorr_atom_charges = uncorr_atom_charges - self.rdkit_mol = rdkit_mol - self.smiles = smiles - self.charge_tried = charge_tried - self.allow = allow - - self.uncorr_abstotal, self.uncorr_abs_atcharge, self.uncorr_zwitt = eval_chargelist(uncorr_atom_charges) - - if uncorr_total_charge == charge_tried: - self.coincide = True - else: - self.coincide = False - - def correction(self, addedlist, metal_electrons, elemlist): - self.addedlist = addedlist - self.metal_electrons = metal_electrons - self.elemlist = elemlist - self.corr_total_charge = int(0) - self.corr_atom_charges = [] - # Corrects the Charge of atoms with addedH - count = 0 - if len(addedlist) > 0: - for idx, add in enumerate(addedlist): # Iterates over the original number of ligand atoms, thus without the added H - if add != 0: - count += 1 - corrected = self.uncorr_atom_charges[idx] - addedlist[idx] + metal_electrons[idx] - self.uncorr_atom_charges[len(addedlist)-1+count] - self.corr_atom_charges.append(corrected) - # last term corrects for cases in which a charge has been assigned to the added atom - else: - self.corr_atom_charges.append(self.uncorr_atom_charges[idx]) - self.corr_total_charge = int(np.sum(self.corr_atom_charges)) - else: - self.corr_total_charge = self.uncorr_total_charge - self.corr_atom_charges = self.uncorr_atom_charges.copy() - - self.corr_abstotal, self.corr_abs_atcharge, self.corr_zwitt = eval_chargelist(self.corr_atom_charges) - -# fmt: on diff --git a/cell2mol/old_classes.py b/cell2mol/old_classes.py deleted file mode 100755 index 93a2a70e..00000000 --- a/cell2mol/old_classes.py +++ /dev/null @@ -1,489 +0,0 @@ -#!/usr/bin/env python - -import numpy as np -from scipy import sparse -from scipy.sparse import csr_matrix -from scipy.sparse.csgraph import reverse_cuthill_mckee -from typing import Tuple - -from cell2mol.elementdata import ElementData -elemdatabase = ElementData() - -def find_groups_within_ligand(ligand: object) -> list: - - debug = 0 - if debug >= 1: - print(f"DOING LIGAND: {ligand.labels}") - if debug >= 1: - print(f"FIND GROUPS received conmat shape: {ligand.conmat.shape}") - if debug >= 2: - print(f"FIND GROUPS received conmat: {ligand.conmat}") - - connected_atoms = [] - unconnected_atoms = [] - cutmolec = [] - for idx, a in enumerate(ligand.atoms): - if a.mconnec >= 1: - connected_atoms.append(idx) - cutmolec.append([a.label, idx]) - elif a.mconnec == 0: - unconnected_atoms.append(idx) - - rowless = np.delete(ligand.conmat, unconnected_atoms, 0) - columnless = np.delete(rowless, unconnected_atoms, axis=1) - - if debug >= 1: - print(f"FIND GROUPS: connected are: {connected_atoms}") - if debug >= 1: - print(f"FIND GROUPS: unconnected are: {unconnected_atoms}") - - # Regenerates the truncated lig.connec - connec = [] - for idx, c in enumerate(ligand.connec): - if idx in connected_atoms: - connec.append(ligand.connec[idx]) - - # Does the - degree = np.diag(connec) - lap = columnless - degree - - # Copied from split_complex - graph = csr_matrix(lap) - perm = reverse_cuthill_mckee(graph) - gp1 = graph[perm, :] - gp2 = gp1[:, perm] - dense = gp2.toarray() - - startlist, endlist = getblocks(dense) - ngroups = len(startlist) - - atomlist = np.zeros((len(dense))) - for b in range(0, ngroups): - for i in range(0, len(dense)): - if (i >= startlist[b]) and (i <= endlist[b]): - atomlist[i] = b + 1 - invperm = inv(perm) - atomlistperm = [int(atomlist[i]) for i in invperm] - - if debug >= 1: - print(f"FIND GROUPS: the {ngroups} groups start at: {startlist}") - - groups = [] - for b in range(0, ngroups): - atlist = [] - for i in range(0, len(atomlistperm)): - if atomlistperm[i] == b + 1: - atlist.append(cutmolec[i][1]) - groups.append(atlist) - - if debug >= 1: - print(f"FIND GROUPS finds {ngroups} as {groups}") - - return groups - -############################ CLASSES ######################### - -############### -### ATOM ###### -############### -class old_atom(object): - def __init__(self, index: int, label: str, coord: list, radii: float) -> None: - self.version = "V1.0" - self.index = index - self.label = label - self.coord = coord - self.frac = [] - self.atnum = elemdatabase.elementnr[label] - self.val = elemdatabase.valenceelectrons[label] # currently not used - self.weight = elemdatabase.elementweight[label] # currently not used - self.block = elemdatabase.elementblock[label] - self.radii = radii - - # Adjacency part is simultaneously to creating the ligand or molecule object - ### Changed in V14 - def adjacencies(self, conmat: np.ndarray, mconmat: np.ndarray, type: str="Molecule") -> None: - self.adjacency = [] - self.metal_adjacency = [] - - self.connec = np.sum(conmat) - if conmat.shape: - for idx, c in enumerate(conmat): - if c >= 1: - self.adjacency.append(idx) - else: - self.adjacency.append(conmat) - - if type == "Molecule": - self.mconnec = np.sum(mconmat) - if mconmat.shape: - for idx, c in enumerate(mconmat): - if c >= 1: - self.metal_adjacency.append(idx) - else: - self.adjacency.append(conmat) - - elif type == "Ligand" or type == "Metal": - self.mconnec = mconmat # this has to be improved, now it only receives a number, should receive a vector as above for "molecule" - - # Bonds part is created after the formal charge for each molecule/ligand/metal is decided - def bonds(self, start: list, end: list, order: list) -> None: - self.bond = [] - self.bond_start_idx = [] - self.bond_end_idx = [] - self.bond_order = [] - - for a in start: - self.bond_start_idx.append(a) - for b in end: - self.bond_end_idx.append(b) - for c in order: - self.bond_order.append(c) - - for group in zip(start, end, order): - self.bond.append(group) - - self.nbonds = len(self.bond) - self.totbondorder = np.sum(self.bond_order) - - def atom_charge(self, charge: int) -> None: - self.charge = charge - -############### -### MOLECULE ## -############### -class old_molecule(object): - def __init__(self, name: str, atlist: list, labels: list, coord: list, radii:list) -> None: - self.version = "V1.0" - self.refcode = "" - self.name = name - self.atlist = atlist - self.labels = labels - self.coord = coord - self.radii = radii - self.formula = labels2formula(labels) - self.occurrence = 0 # How many times the molecule appears in a unit cell - - self.natoms = len(atlist) - self.elemcountvec = getelementcount(labels) - self.Hvcountvec = getHvcount(labels) - - self.frac = [] - self.centroid = [] - self.tmatrix = [] - - # Creates Atoms - self.atnums = [] - self.atoms = [] - for idx, l in enumerate(self.labels): - newatom = atom(idx, l, self.coord[idx], self.radii[idx]) - self.atnums.append(newatom.atnum) - self.atoms.append(newatom) - - self.type = "Other" - self.eleccount = 0 - self.numH = 0 - for a in self.atoms: - if a.atnum == 1: - self.numH += 1 - self.eleccount += a.atnum - if (a.block == "d") or (a.block == "f"): - self.type = "Complex" - - if self.type == "Complex": - self.ligandlist = [] - self.metalist = [] - self.hapticity = False # V13 - self.hapttype = [] # V13 - - # Lists of potentially good variables for this molecule - self.poscharge = [] - self.posatcharge = [] - self.posobjlist = [] - self.posspin = [] - self.possmiles = [] - - # Stores the covalentradii factor and metal factor that were used to generate the molecule - def information(self, factor: float, metal_factor: float) -> None: - self.factor = factor - self.metal_factor = metal_factor - - # Actual variables for the molecule in the crystal where it comes from: - def charge(self, atcharge: np.ndarray, totcharge: int, rdkit_mol: list, smiles: list) -> None: - self.atcharge = atcharge - self.totcharge = totcharge - self.smiles_with_H = " " # If "Complex", it will be a list of ligand smiles - self.smiles = smiles # If "Complex", it will be a list of ligand smiles - self.rdkit_mol = rdkit_mol # If "Complex", it is an empty list - - if self.type != "Complex": - for idx, a in enumerate(self.atoms): - a.atom_charge(self.atcharge[idx]) - - # Spin State Variables - def magnetism(self, spin: int) -> None: - self.spin = spin - - def ml_prediction(self, spin_rf: int) -> None: - self.spin_rf = spin_rf - - # Connectivity = Adjacency Matrix. Potentially expandable to Include Bond Types - def adjacencies(self, conmat: np.ndarray, mconmat: np.ndarray) -> None: - self.conmat = conmat - self.mconmat = mconmat - self.connec = np.zeros((self.natoms)) - self.mconnec = np.zeros((self.natoms)) - for i in range(0, self.natoms): - self.connec[i] = np.sum(self.conmat[i, :]) - self.mconnec[i] = np.sum(self.mconmat[i, :]) - - self.totconnec = int(np.sum(self.connec) / 2) - self.totmconnec = int(np.sum(self.mconnec) / 2) - self.adjtypes = get_adjacency_types(self.labels, self.conmat) # V14 - # self.nbonds = int(np.sum(self.bonds)/2) - - for idx, a in enumerate(self.atoms): - # print("adjacencies sent with", np.array(self.conmat[idx].astype(int)), np.array(self.mconmat[idx].astype(int))) - a.adjacencies(np.array(self.conmat[idx].astype(int)),np.array(self.mconmat[idx].astype(int)),type="Molecule") - - # def repr_CM(self, ): - # self.CM = coulomb_matrix(self.atnums, self.coord) - # NOTE: don't forget to copy to ligand object when ready - - -############### -### LIGAND #### -############### -class old_ligand(object): - def __init__(self, name: str, atlist: list, labels: list, coord: list, radii: list) -> None: - self.version = "V1.0" - self.refcode = "" - self.name = name # creates as ligand index, later modified - self.atlist = atlist # atom index list. Numbers refer to the original molecule from where the subroutine is launched - self.labels = labels # elements - self.coord = coord # coordinates - self.radii = radii - self.formula = labels2formula(labels) - self.occurrence = 0 # How many times the ligand appears in a molecule - - self.natoms = len(atlist) # number of atoms - self.type = "Ligand" - self.elemcountvec = getelementcount(labels) - self.Hvcountvec = getHvcount(labels) - - # Lists of potentially good variables for this molecule - self.poscharge = [] - self.posatcharge = [] - self.posobjlist = [] - self.posspin = [] - self.possmiles = [] - - # Stores information about the metal to which it is attached, about groups of connected atoms, and hapticity - self.metalatoms = [] - self.grouplist = [] # V14, this info is generated in get_hapticity - self.hapticity = False # V13 - self.hapttype = [] # V13 - # self.haptgroups = [] #V14, replaced by grouplist - - self.atnums = [] - self.atoms = [] - for idx, l in enumerate(self.labels): - newatom = atom(idx, l, self.coord[idx], self.radii[idx]) - self.atnums.append(newatom.atnum) - self.atoms.append(newatom) - - # Creates atoms and defines charge - self.eleccount = 0 - self.numH = 0 - for a in self.atoms: - if a.atnum == 1: - self.numH += 1 - self.eleccount += a.atnum - - # Stores the covalentradii factor and metal factor that were used to generate the molecule - def information(self, factor: float, metal_factor: float) -> None: - self.factor = factor - self.metal_factor = metal_factor - - def charge(self, atcharge: list, totcharge: int, rdkit_mol: object, smiles: str) -> None: - self.atcharge = atcharge - self.totcharge = totcharge - self.smiles_with_H = " " # Smiles of the ligand that includes any added H atom, created at "build_bonds" function - self.smiles = smiles # Now Empty, created later as a smiles without any added H atom - self.rdkit_mol = rdkit_mol # Rdkit mol object - - for idx, a in enumerate(self.atoms): - a.atom_charge(int(self.atcharge[idx])) - - # Spin State Variables - def magnetism(self, spin: int) -> None: - self.spin = spin - - def adjacencies(self, conmat: np.ndarray, mconnec: np.ndarray) -> None: - self.conmat = conmat - self.connec = np.zeros((self.natoms)) - for i in range(0, self.natoms): - self.connec[i] = np.sum(self.conmat[i, :]) - - # For ligand, mconmat is all zero np.ndarray --> not used, Can I remove? - # so metal_adjacency in Class atom is also empty because mconmat doesn't contain metal-ligand connectivity - self.mconmat = np.zeros((self.natoms, self.natoms)).astype(int) - - self.mconnec = mconnec - self.totconnec = int(np.sum(self.connec)) - self.totmconnec = int(np.sum(self.mconnec)) - self.adjtypes = get_adjacency_types(self.labels, self.conmat) # V14 - - for idx, a in enumerate(self.atoms): - a.adjacencies(np.array(self.conmat[idx].astype(int)), int(mconnec[idx]), type="Ligand") - -############### -class old_group(object): - def __init__(self, atlist: list, hapticity: bool, hapttype: list) -> None: - self.version = "V1.0" - self.atlist = atlist # atom index list. Numbers refer to the original molecule from where the subroutine is launched - self.hapticity = hapticity - self.hapttype = hapttype - - -############### -#### METAL #### -############### -class old_metal(object): - def __init__(self, name: int, atlist: int, label: str, coord: list, radii: float) -> None: - self.version = "V1.0" - self.refcode = "" - self.name = name # creates as metal index, later modified - self.atlist = atlist # atom index list. Numbers refer to the original molecule from where the subroutine is launched - self.label = label - self.coord = coord - self.radii = radii - self.natom = int(1) # number of atoms - self.type = "Metal" - self.poscharge = [] - self.coord_sphere = [] - self.coord_sphere_ID = [] - self.coordinating_atoms = [] - self.coordinating_atoms_sites = [] - self.occurrence = 0 # How many times the metal appears in a unit cell - self.coordinating_atlist = [] - self.group_list = [] - self.group_atoms_list = [] - - self.atom = atom(name, label, self.coord, self.radii) - - # Stores the covalentradii factor and metal factor that were used to generate the molecule - def information(self, factor: float, metal_factor: float) -> None: - self.factor = factor - self.metal_factor = metal_factor - - def charge(self, metal_charge: int) -> None: - self.totcharge = metal_charge - - # def magnetism(self, spin): - # self.spin = spin - - def adjacencies(self, mconnec: np.ndarray) -> None: - self.mconnec = mconnec # adjacencies matrix with only metal bonds - self.totmconnec = int(np.sum(mconnec)) - self.atom.adjacencies(np.array(int(0)), int(mconnec), type="Metal") - - def coordination (self, coordination_number: int, hapticity: bool, hapttype: list, posgeom_dev: dict) -> None: - self.hapticity = hapticity - self.hapttype = hapttype - self.coordination_number=coordination_number - self.posgeom_dev=posgeom_dev - if len(posgeom_dev) == 0: - self.geometry = "Undefined" - self.deviation = "Undefined" - else: - self.geometry=min(posgeom_dev, key=posgeom_dev.get) - self.deviation=min(posgeom_dev.values()) - - def relative_radius(self, rel: float, rel_g: float, rel_c: float) -> None: - self.rel = rel - self.rel_g = rel_g - self.rel_c = rel_c - - -############## -#### CELL #### -############## -class old_cell(object): - def __init__(self, refcode: str, labels: list, pos: list, cellvec: list, cellparam: list, warning_list: list) -> None: - - self.version = "V1.0" - self.refcode = refcode - - self.cellvec = cellvec - self.cellparam = cellparam - - self.labels = labels - self.atom_coord = pos # Atom cartesian coordinates from info file - - self.natoms = len(labels) - self.coord = [] - - self.speclist = [] - self.refmoleclist = [] - self.moleclist = [] - self.warning_list = warning_list - self.warning_after_reconstruction = [] - - self.charge_distribution_list = [] - - def arrange_cell_coord(self): - ## Updates the cell coordinates preserving the original atom ordering - ## Do do so, it uses the variable atlist stored in each molecule - self.coord = np.zeros((self.natoms,3)) - for mol in self.moleclist: - for z in zip(mol.atlist, mol.coord): - for i in range(0,3): - self.coord[z[0]][i] = z[1][i] - self.coord = np.ndarray.tolist(self.coord) - - - def data_for_postproc(self, molecules, indices, options): - self.pp_molecules = molecules - self.pp_indices = indices - self.pp_options = options - - def print_charge_assignment(self): - print("=====================================Charges for all species in the unit cell=====================================") - print("[Ref.code] : {}".format(self.refcode)) - for unit in self.moleclist: - if unit.type == "Complex": - print("\n{}, totcharge {}, spin multiplicity {}".format(unit.type, unit.totcharge, unit.spin)) - for metal in unit.metalist: - print("\t Metal: {} charge {}".format(metal.label, metal.totcharge)) - for ligand in unit.ligandlist: - print("\t Ligand charge {}, {}".format(ligand.totcharge, ligand.smiles)) - elif unit.type == "Other": - print("\n{} totcharge {}, {}".format(unit.type, unit.totcharge, unit.smiles)) - print("\n") - - def print_Warning(self): - reason_of_Warning = [ - "Warning 1! Errors received from getting reference molecules (disorder or strange coordination)", - "Warning 2! Missing H atoms from C in reference molecules", - "Warning 3! Missing H atoms from coordinated water in reference molecules", - "Warning 4! Steric clashes while blocking molecules", - "Warning 5! Errors in cell reconstruction", - "Warning 6! Empty list of possible charges received for molecule or ligand", - "Warning 7! More than one valid possible charge distribution found", - "Warning 8! No valid possible charge distribution found", - "Warning 9! Error while preparing molecules.", - ] - - # printing original list - print("The Warning type list is : " + str(self.warning_list)) - - res = [i for i, val in enumerate(self.warning_list) if val] - # printing result - # print ("The list indices having True values are : " + str(res)) - - if len(res) == 0: - print("No Warnings!") - else: - for i in res: - print(reason_of_Warning[i]) -########## END OF CLASSES ########### diff --git a/cell2mol/readwrite.py b/cell2mol/read_write.py similarity index 100% rename from cell2mol/readwrite.py rename to cell2mol/read_write.py diff --git a/cell2mol/sergi_classes.py b/cell2mol/sergi_classes.py deleted file mode 100644 index 5cab1605..00000000 --- a/cell2mol/sergi_classes.py +++ /dev/null @@ -1,772 +0,0 @@ -import numpy as np -from Scope.Adapted_from_cell2mol import * -from Scope.Other import get_metal_idxs -from Scope.Unit_cell_tools import * -#from Scope.Reconstruct import * - -################################ -#### BASIS FOR CELL2MOL 2 #### -################################ -class specie(object): - def __init__(self, labels: list, coord: list, indices: list=None, radii: list=None, parent: object=None) -> None: - - # Sanity Checks - assert len(labels) == len(coord) - if indices is not None: assert len(labels) == len(indices) - if radii is not None: assert len(labels) == len(radii) - - # Optional Information - if radii is not None: self.radii = radii - else: self.radii = get_radii(labels) - if parent is not None: self.parent = parent -# if parent is not None: self.occurence = parent.get_occurrence(self) - - self.type = "specie" - self.version = "0.1" - self.labels = labels - self.coord = coord - self.formula = labels2formula(labels) - self.eleccount = labels2electrons(labels) - self.natoms = len(labels) - self.iscomplex = any((elemdatabase.elementblock[l] == "d") or (elemdatabase.elementblock[l] == "f") for l in self.labels) - - if indices is not None: self.indices = indices - else: self.indices = [*range(0,self.natoms,1)] - - self.cov_factor = 1.3 - self.metal_factor = 1.0 - - def get_centroid(self): - self.centroid = compute_centroid(self.coord) - if hasattr(self,"frac_coord"): self.frac_centroid = compute_centroid(self.frac_coord) - return self.centroid - - def set_fractional_coord(self, frac_coord: list) -> None: - self.frac_coord = frac_coord - - def get_fractional_coord(self, cell_vector=None) -> None: - from Scope.Reconstruct import cart2frac - if cell_vector is None: - if hasattr(self,"parent"): - if hasattr(self.parent,"cellvec"): cell_vector = self.parent.cellvec.copy() - else: print("CLASS_SPECIE: get_fractional coordinates. Missing cell vector. Please provide it"); return None - else: - self.frac_coord = cart2frac(self.coord, cell_vector) - return self.frac_coord - - def get_atomic_numbers(self): - if not hasattr(self,"atoms"): self.set_atoms() - self.atnums = [] - for at in self.atoms: - self.atnums.append(at.atnum) - return self.atnums - - def set_atoms(self, atomlist: object=None): - if atomlist is None: - self.atoms = [] - for idx, l in enumerate(self.labels): - newatom = atom(l, self.coord[idx], parent=self, index=idx, radii=self.radii[idx]) - self.atoms.append(newatom) - else: - self.atoms = atomlist - - def set_element_count(self, heavy_only: bool=False): - self.element_count = get_element_count(self.labels, heavy_only=heavy_only) - return self.element_count - - def set_adj_types(self): - if not hasattr(self,"adjmat"): self.get_adjmatrix() - self.adj_types = get_adjacency_types(self.labels, self.adjmat) - return self.adj_types - - def set_adjacency_parameters(self, cov_factor: float, metal_factor: float) -> None: - # Stores the covalentradii factor and metal factor that were used to generate the molecule - self.cov_factor = cov_factor - self.metal_factor = metal_factor - - def set_charges(self, totcharge: int=None, atomic_charges: list=None) -> None: - ## Sets total charge - if totcharge is not None: self.totcharge = totcharge - elif totcharge is None and atomic_charges is not None: self.totcharge = np.sum(atomic_charges) - elif totcharge is None and atomic_charges is None: self.totcharge = "Unknown" - ## Sets atomic charges - if atomic_charges is not None: - self.atomic_charges = atomic_charges - if not hasattr(self,"atoms"): self.set_atoms() - for idx, a in enumerate(self.atoms): - a.set_charge(self.atomic_charges[idx]) - - def get_adjmatrix(self): - isgood, adjmat, adjnum = get_adjmatrix(self.labels, self.coord, self.cov_factor, self.radii) - if isgood: - self.adjmat = adjmat - self.adjnum = adjnum - else: - self.adjmat = None - self.adjnum = None - return self.adjmat, self.adjnum - - def get_occurrence(self, substructure: object) -> int: - occurrence = 0 - ## Ligands in Complexes or Groups in Ligands - done = False - if hasattr(substructure,"subtype") and hasattr(self,"subtype"): - if substructure.subtype == 'ligand' and self.subtype == 'molecule': - if not hasattr(self,"ligands"): self.split_complex() - if self.ligands is not None: - for l in self.ligands: - issame = compare_species(substructure, l, debug=1) - if issame: occurrence += 1 - done = True - elif substructure.subtype == 'group' and self.subtype == 'ligand': - if not hasattr(self,"ligands"): self.split_complex() - if self.ligands is not None: - for l in self.ligands: - if not hasattr(l,"groups"): self.split_ligand() - for g in l.groups: - issame = compare_species(substructure, g, debug=1) - if issame: occurrence += 1 - done = True - ## Atoms in Species - if not done: - if substructure.type == 'atom' and self.type == 'specie': - if not hasattr(self,"atoms"): self.set_atoms() - for at in self.atoms: - issame = compare_atoms(substructure, at) - if issame: occurrence += 1 - return occurrence - - def magnetism(self, spin: int) -> None: - self.spin = spin - - def print_xyz(self): - print(self.natoms) - print("") - for idx, l in enumerate(self.labels): - print("%s %.6f %.6f %.6f" % (l, self.coord[idx][0], self.coord[idx][1], self.coord[idx][2])) - - ## To be implemented - def __add__(self, other): - if not isinstance(other, type(self)): return self - return self - - def __repr__(self): - to_print = f'---------------------------------------------------\n' - to_print += ' >>> SPECIE >>> \n' - to_print += f'---------------------------------------------------\n' - to_print += f' Version = {self.version}\n' - to_print += f' Type = {self.type}\n' - if hasattr(self,'subtype'): to_print += f' Sub-Type = {self.subtype}\n' - to_print += f' Number of Atoms = {self.natoms}\n' - to_print += f' Formula = {self.formula}\n' - if hasattr(self,"adjmat"): to_print += f' Has Adjacency Matrix = YES\n' - else: to_print += f' Has Adjacency Matrix = NO \n' - if hasattr(self,"parent"): - if self.parent is not None: to_print += f' Has Parent = YES\n' - else: to_print += f' Has Parent = NO \n' - if hasattr(self,"occurrence"): to_print += f' Occurrence in Parent = {self.occurrence}\n' - if hasattr(self,"totcharge"): to_print += f' Total Charge = {self.totcharge}\n' - if hasattr(self,"spin"): to_print += f' Spin = {self.spin}\n' - if hasattr(self,"smiles"): to_print += f' SMILES = {self.smiles}\n' - to_print += '---------------------------------------------------\n' - return to_print - -############### -### MOLECULE ## -############### -class molecule(specie): - def __init__(self, labels: list, coord: list, indices: list=None, radii: list=None, parent: object=None) -> None: - self.subtype = "molecule" - specie.__init__(self, labels, coord, indices, radii, parent) - -############ - def split_complex(self, debug: int=0): - if not self.iscomplex: - self.ligands = None - self.metals = None - #print("MOLECULE.SPLIT_COMPLEX: This molecule is not a transition metal complex"); - - else: - self.ligands = [] - self.metals = [] - # Identify Metals and the rest - metal_idx = get_metal_idxs(self.labels, debug=debug) - rest_idx = list(idx for idx in range(0,len(self.labels)) if idx not in metal_idx) - if debug > 0: - print(f"SPLIT COMPLEX: found metals in indices {metal_idx}") - print(f"SPLIT COMPLEX: rest of indices: {rest_idx}") - #rest_idx = list(idx for idx in self.indices if idx not in metal_idx) - # Split the "rest" to obtain the ligands - rest_labels = extract_from_list(rest_idx, self.labels, dimension=1) - rest_coords = extract_from_list(rest_idx, self.coord, dimension=1) - rest_indices = None - #rest_indices = extract_from_list(rest_idx, self.indices, dimension=1) - rest_radii = extract_from_list(rest_idx, self.radii, dimension=1) - if hasattr(self,"frac_coord"): rest_frac = extract_from_list(rest_idx, self.frac_coord, dimension=1) - if debug > 0: print(f"SPLIT COMPLEX: rest labels: {rest_labels}") - if debug > 0: print(f"SPLIT COMPLEX: splitting species with {len(rest_labels)} atoms in block") - if hasattr(self,"cov_factor"): blocklist = split_species(rest_labels, rest_coords, radii=rest_radii, indices=rest_indices, cov_factor=self.cov_factor, debug=debug) - else: blocklist = split_species(rest_labels, rest_coords, radii=rest_radii, indices=rest_indices, debug=debug) - ## Arranges Ligands - for b in blocklist: - if debug > 0: print(f"PREPARING BLOCK: {b}") - lig_labels = extract_from_list(b, rest_labels, dimension=1) - lig_coord = extract_from_list(b, rest_coords, dimension=1) - #lig_indices = extract_from_list(b, rest_indices, dimension=1) - lig_radii = extract_from_list(b, rest_radii, dimension=1) - newligand = ligand(lig_labels, lig_coord, indices=None, radii=lig_radii, parent=self) - #newligand = ligand(lig_labels, lig_coord, indices=lig_indices, radii=lig_radii, parent=self) - # If fractional coordinates are available... - if hasattr(self,"frac_coord"): - lig_frac_coords = extract_from_list(b, rest_frac_coord, dimension=1) - newligand.set_fractional_coord(lig_frac_coords) - self.ligands.append(newligand) - ## Arranges Metals - for m in metal_idx: - newmetal = metal(self.labels[m], self.coord[m], self.indices[m], self.radii[m], parent=self) - self.metals.append(newmetal) - return self.ligands, self.metals - - #def get_metal_adjmatrix(self): - # if not hasattr(self,"adjmat"): self.get_adjmatrix() - # if self.adjmat is None: return None, None - # madjmat = np.zeros((self.natoms,self.natoms)) - # madjnum = np.zeros((self.natoms)) - # metal_idx = get_connected_idx(self, debug=debug) - # for idx, row in enumerate(self.adjmat): - # for jdx, col in enumerate(row): - # if not idx in metal_idx: madjmat[idx,jdx] = int(0) - # else: madjmat[idx,jdx] = adjmat[idx,jdx] - # for i in range(0, len(adjmat)): - # madjnum[i] = np.sum(madjmat[i, :]) - # return self.madjmat, self.madjnum - -############ - -############### -### LIGAND #### -############### -class ligand(specie): - def __init__(self, labels: list, coord: list, indices: list=None, radii: list=None, parent: object=None) -> None: - self.subtype = "ligand" - specie.__init__(self, labels, coord, indices, radii, parent) - - def set_hapticity(self, hapttype): - self.hapticity = True - self.hapttype = hapttype - - def get_connected_idx(self, debug: int=0): - self.connected_idx = [] - if not hasattr(self.parent,"adjmat"): madjmat, madjnum = self.parent.get_metal_adjmatrix() - self.madjmat = extract_from_list(self.indices, madjmat, dimension=2) - self.madjnum = extract_from_list(self.indices, madjnum, dimension=1) - for idx, con in enumerate(self.madjnum): - if con > 0: self.connected_idx.append(idx) - return self.connected_idx - - def split_ligand(self, debug: int=0): - self.groups = [] - # Identify Connected and Unconnected atoms (to the metal) - if not hasattr(self,"connected_idx"): self.get_connected_idx() - conn_idx = self.connected_idx - rest_idx = list(idx for idx in self.indices if idx not in conn_idx) - - # Split the "ligand to obtain the groups - conn_labels = extract_from_list(conn_idx, self.labels, dimension=1) - conn_coords = extract_from_list(conn_idx, self.coord, dimension=1) - conn_indices = extract_from_list(conn_idx, self.indices, dimension=1) - - if hasattr(self,"cov_factor"): blocklist = split_species(conn_labels, conn_coords, indices=conn_indices, cov_factor=self.cov_factor) - else: blocklist = split_species(conn_labels, conn_coords, indices=conn_indices) - ## Arranges Groups - for b in blocklist: - gr_labels = extract_from_list(b, self.labels, dimension=1) - gr_coord = extract_from_list(b, self.coord, dimension=1) - gr_indices = extract_from_list(b, self.indices, dimension=1) - gr_radii = extract_from_list(b, self.radii, dimension=1) - newgroup = group(gr_labels, gr_coord, gr_indices, gr_radii, parent=self) - self.groups.append(newgroup) - - return self.groups - -############### -#### GROUP #### -############### -class group(specie): - def __init__(self, labels: list, coord: list, indices: list=None, radii: list=None, parent: object=None) -> None: - self.subtype = "group" - specie.__init__(self, labels, coord, indices, radii, parent) - -############### -### ATOM ###### -############### -class atom(object): - def __init__(self, label: str, coord: list, index: int=None, radii: float=None, parent: object=None, frac_coord: list=None) -> None: - self.type = "atom" - self.version = "0.1" - self.label = label - self.coord = coord - self.atnum = elemdatabase.elementnr[label] - self.block = elemdatabase.elementblock[label] - - if index is not None: self.index = index - if radii is None: self.radii = getradii(label) - else: self.radii = radii - if parent is not None: self.parent = parent - if parent is not None: self.occurence = parent.get_occurrence(self) - if frac_coord is not None: self.frac_coord = frac_coord - - def set_adjacency_parameters(self, cov_factor: float, metal_factor: float) -> None: - self.cov_factor = cov_factor - self.metal_factor = metal_factor - - def set_charge(self, charge: int) -> None: - self.charge = charge - - def information(self, cov_factor: float, metal_factor: float) -> None: - self.cov_factor = cov_factor - self.metal_factor = metal_factor - - def __repr__(self): - to_print = f'---------------------------------------------------\n' - to_print += ' >>> ATOM >>> \n' - to_print += f'---------------------------------------------------\n' - to_print += f' Version = {self.version}\n' - to_print += f' Type = {self.type}\n' - if hasattr(self,'subtype'): to_print += f' Sub-Type = {self.subtype}\n' - to_print += f' Label = {self.label}\n' - to_print += f' Atomic Number = {self.atnum}\n' - to_print += f' Index = {self.index}\n' - if hasattr(self,"occurrence"): to_print += f' Occurrence in Parent = {self.occurrence}\n' - if hasattr(self,"charge"): to_print += f' Atom Charge = {self.charge}\n' - to_print += '----------------------------------------------------\n' - return to_print - -############### -#### METAL #### -############### -class metal(atom): - def __init__(self, label: str, coord: list, index: int=None, radii: float=None, parent: object=None, frac_coord: list=None) -> None: - self.subtype = "metal" - atom.__init__(self, label, coord, index=index, radii=radii, parent=parent, frac_coord=frac_coord) - - def get_coord_sphere(self): - if not hasattr(self,"parent"): return None - if not hasattr(self.parent,"adjmat"): self.parent.get_adjmatrix() - adjmat = self.parent.adjmat.copy() - connec_atoms_labels = [] - for idx, at in enumerate(adjmat[self.index]): - if at >= 1: - connec_atoms_labels.append(str(self.parent.labels[idx])) - self.coord_sphere = labels2formula(connec_atoms_labels) - return self.coord_sphere - -############## -#### CELL #### -############## -class cell(object): - def __init__(self, refcode: str, labels: list, pos: list, cellvec: list, cellparam: list) -> None: - self.version = "0.1" - self.type = "cell" - self.refcode = refcode - self.labels = labels - self.coord = pos - self.pos = pos - self.cellvec = cellvec - self.cellparam = cellparam - self.natoms = len(labels) - - def set_fractional_coord(self, frac_coord: list) -> None: - self.frac_coord = frac_coord - - def set_moleclist(self, moleclist: list) -> None: - self.moleclist = moleclist - - def get_moleclist(self, blocklist=None): - if not hasattr(self,"labels") or not hasattr(self,"pos"): return None - if len(self.labels) == 0 or len(self.pos) == 0: return None - cov_factor = 1.3 - - if blocklist is None: blocklist = split_species(self.labels, self.pos, cov_factor=cov_factor) - self.moleclist = [] - for b in blocklist: - mol_labels = extract_from_list(b, self.labels, dimension=1) - mol_coords = extract_from_list(b, self.coord, dimension=1) - newmolec = molecule(mol_labels, mol_coords) - # If fractional coordinates are available... - if hasattr(self,"frac_coord"): - mol_frac_coords = extract_from_list(b, self.frac_coord, dimension=1) - newmolec.set_fractional_coord(mol_frac_coords) - # This must be below the frac_coord, so they are carried on to the ligands - if newmolec.iscomplex: - newmolec.split_complex() - self.moleclist.append(newmolec) - return self.moleclist - - def arrange_cell_coord(self): - ## Updates the cell coordinates preserving the original atom ordering - ## Do do so, it uses the variable atlist stored in each molecule - self.coord = np.zeros((self.natoms,3)) - for mol in self.moleclist: - for z in zip(mol.indices, mol.coord): - for i in range(0,3): - self.coord[z[0]][i] = z[1][i] - self.coord = np.ndarray.tolist(self.coord) - - def get_occurrence(self, substructure: object) -> int: - occurrence = 0 - ## Molecules in Cell - if hasattr(substructure,"subtype") and hasattr(self,"moleclist"): - if substructure.subtype == 'molecule': - for m in self.moleclist: - issame = compare_species(substructure, m) - if issame: occurrence += 1 - return occurrence - -# def reconstruct(self, debug: int=0): -# if not hasattr(self,"refmoleclist"): print("CLASS_CELL missing list of reference molecules"); return None -# from Scope.Other import HiddenPrints -# import itertools -# print("CLASS_CELL: reconstructing cell") -# with HiddenPrints(): -# if not hasattr(self,"moleclist"): self.get_moleclist() -# blocklist = self.moleclist.copy() # In principle, in moleclist now there are both fragments and molecules -# refmoleclist = self.refmoleclist.copy() -# cov_factor = refmoleclist[0].cov_factor -# metal_factor = refmoleclist[0].metal_factor -# ## Prepares Blocks -# for b in blocklist: -# if not hasattr(b,"frac_coord"): b.get_fractional_coord(cellvec) -# if not hasattr(b,"centroid"): b.get_centroid() -# if not hasattr(b,"element_count"): b.set_element_count() -# if not hasattr(b,"numH"): b.numH = b.set_element_count()[4] -# ## Prepares Reference Molecules -# for ref in refmoleclist: -# if not hasattr(ref,"element_count"): ref.set_element_count() -# if not hasattr(ref,"numH"): ref.numH = ref.set_element_count()[4] -# ## Classifies fragments -# moleclist, fraglist, Hlist = classify_fragments(blocklist, moleclist, refmoleclist, self.cellvec) -# ## Determines if Reconstruction is necessary -# if len(fraglist) > 0 or len(Hlist) > 0: isfragmented = True -# else: isfragmented = False -# -# if not isfragmented: return self.moleclist -# moleclist, finalmols, Warning = fragments_reconstruct(moleclist,fraglist,Hlist,refmoleclist,self.cellvec,cov_factor,metal_factor) -# moleclist.extend(finalmols) -# self.moleclist = moleclist -# self.set_geometry_from_moleclist() -# return self.moleclist - - def __repr__(self): - to_print = f'---------------------------------------------------\n' - to_print += ' >>> CELL >>> \n' - to_print += f'---------------------------------------------------\n' - to_print += f' Version = {self.version}\n' - to_print += f' Type = {self.type}\n' - to_print += f' Refcode = {self.refcode}\n' - to_print += f' Num Atoms = {self.natoms}\n' - to_print += f' Cell Parameters a:c = {self.cellparam[0:3]}\n' - to_print += f' Cell Parameters al:ga = {self.cellparam[3:6]}\n' - if hasattr(self,"moleclist"): - to_print += f' # Molecules: = {len(self.moleclist)}\n' - to_print += f' With Formulae: \n' - for idx, m in enumerate(self.moleclist): - to_print += f' {idx}: {m.formula} \n' - to_print += '---------------------------------------------------\n' - if hasattr(self,"refmoleclist"): - to_print += f' # of Ref Molecules: = {len(self.refmoleclist)}\n' - to_print += f' With Formulae: \n' - for idx, ref in enumerate(self.refmoleclist): - to_print += f' {idx}: {ref.formula} \n' - return to_print - - -##################### -### SPLIT SPECIES ### -##################### -#def split_species_from_object(obj: object, debug: int=0): -# -# if not hasattr(obj,"adjmat"): obj.get_adjmatrix() -# if obj.adjmat is None: return None -# -# degree = np.diag(self.adjnum) # creates a matrix with adjnum as diagonal values. Needed for the laplacian -# lap = obj.adjmat - degree # computes laplacian -# -# # creates block matrix -# graph = csr_matrix(lap) -# perm = reverse_cuthill_mckee(graph) -# gp1 = graph[perm, :] -# gp2 = gp1[:, perm] -# dense = gp2.toarray() -# -# # detects blocks in the block diagonal matrix called "dense" -# startlist, endlist = get_blocks(dense) -# -# nblocks = len(startlist) -# # keeps track of the atom movement within the matrix. Needed later -# atomlist = np.zeros((len(dense))) -# for b in range(0, nblocks): -# for i in range(0, len(dense)): -# if (i >= startlist[b]) and (i <= endlist[b]): -# atomlist[i] = b + 1 -# invperm = inv(perm) -# atomlistperm = [int(atomlist[i]) for i in invperm] -# -# # assigns atoms to molecules -# blocklist = [] -# for b in range(0, nblocks): -# atlist = [] # atom indices in the original ordering -# for i in range(0, len(atomlistperm)): -# if atomlistperm[i] == b + 1: -# atlist.append(indices[i]) -# blocklist.append(atlist) -# return blocklist - -###################### -#### IMPORT #### -###################### -def import_cell(old_cell: object, debug: int=0) -> object: - assert hasattr(old_cell,"labels") and (hasattr(old_cell,"coord") or hasattr(old_cell,"pos")) - assert hasattr(old_cell,"cellvec") - assert hasattr(old_cell,"refcode") - - - labels = old_cell.labels - refcode = old_cell.refcode - if hasattr(old_cell,"coord"): coord = old_cell.coord - elif hasattr(old_cell,"pos"): coord = old_cell.pos - - cellvec = old_cell.cellvec - if hasattr(old_cell,"cellparam"): cellparam = old_cell.cellparam - else: cellparam = cellvec_2_cellparam(old_cell.cellvec) - - if hasattr(old_cell,"frac_coord"): frac_coord = old_cell.frac_coord - else: frac_coord = cart2frac(coord, old_cell.cellvec) - - new_cell = cell(refcode, labels, coord, cellvec, cellparam) - new_cell.set_fractional_coord(frac_coord) - if debug > 0: print(f"IMPORT CELL: created new_cell {new_cell}") - - ## Moleclist - moleclist = [] - for mol in old_cell.moleclist: - if debug > 0: print(f"IMPORT CELL: importing molecule {mol.formula} with charge {mol.totcharge}") - new_mol = import_molecule(mol, parent=new_cell, debug=debug) - moleclist.append(new_mol) - new_cell.set_moleclist(moleclist) - - ## Refmoleclist - new_cell.refmoleclist = [] - if hasattr(old_cell,"refmoleclist"): - for rmol in old_cell.refmoleclist: - new_cell.refmoleclist.append(import_molecule(rmol)) - elif hasattr(new_cell,"moleclist"): - for mol in new_cell.moleclist: - found = False - for rmol in new_cell.refmoleclist: - issame = compare_species(rmol, mol) - if issame: found = True - if not found: new_cell.refmoleclist.append(mol) - - ## Temporary things that I'd like to remove from the import once sorted - if hasattr(old_cell,"warning_list"): new_cell.warning_list = old_cell.warning_list - - return new_cell - -################################ -def import_molecule(mol: object, parent: object=None, debug: int=0) -> object: - assert hasattr(mol,"labels") and (hasattr(mol,"coord") or hasattr(mol,"pos")) - labels = mol.labels - if hasattr(mol,"coord"): coord = mol.coord - elif hasattr(mol,"pos"): coord = mol.pos - - if hasattr(mol,"indices"): indices = mol.indices - elif hasattr(mol,"atlist"): indices = mol.atlist - else: indices = None - - if hasattr(mol,"radii"): radii = mol.radii - else: radii = None - - if parent is None: print(f"IMPORT MOLECULE {mol.formula}: parent is NONE") - - new_molec = molecule(labels, coord, indices, radii, parent) - if debug > 0: print(f"IMPORT MOLEC: created new_molec with {new_molec.formula}") - - ## Charges - if hasattr(mol,"totcharge") and hasattr(mol,"atcharge"): - if debug > 0: print(f"IMPORT MOLEC: old molecule has total charge and atomic charges") - new_molec.set_charges(mol.totcharge, mol.atcharge) - elif hasattr(mol,"totcharge") and not hasattr(mol,"atcharge"): - if debug > 0: print(f"IMPORT MOLEC: old molecule has total charge but no atomic charges") - new_molec.set_charges(mol.totcharge) - else: - if debug > 0: print(f"IMPORT MOLEC: old molecule has no total charge nor atomic charges") - - ## Smiles - if hasattr(mol,"Smiles"): new_molec.smiles = mol.Smiles - elif hasattr(mol,"smiles"): new_molec.smiles = mol.smiles - - ## Substructures - if not hasattr(mol,"ligandlist") or not hasattr(mol,"metalist"): new_molec.split_complex() - if hasattr(mol,"ligandlist"): - ligands = [] - for lig in mol.ligandlist: - if debug > 0: print(f"IMPORT MOLEC: old molecule has ligand {lig.formula}") - new_lig = import_ligand(lig, parent=new_molec, debug=debug) - ligands.append(new_lig) - new_molec.ligands = ligands - if hasattr(mol,"metalist"): - metals = [] - for met in mol.metalist: - if debug > 0: print(f"IMPORT MOLEC: old molecule has metal {met.label}") - new_atom = import_atom(met, parent=new_molec, debug=debug) - metals.append(new_atom) - new_molec.metals = metals - - ## Atoms - if not hasattr(mol,"atoms"): - if debug > 0: print(f"IMPORT MOLEC: old molecule has no atoms") - new_molec.set_atoms() - else: - atoms = [] - for at in mol.atoms: - if debug > 0: print(f"IMPORT MOLEC: old molecule has atom {at.label}") - new_atom = import_atom(at, parent=new_molec, debug=debug) - atoms.append(new_atom) - new_molec.set_atoms(atoms) - - return new_molec - -################################ -def import_ligand(lig: object, parent: object=None, debug: int=0) -> object: - assert hasattr(lig,"labels") and (hasattr(lig,"coord") or hasattr(lig,"pos")) - labels = lig.labels - if hasattr(lig,"coord"): coord = lig.coord - elif hasattr(lig,"pos"): coord = lig.pos - - if hasattr(lig,"indices"): indices = lig.indices - elif hasattr(lig,"atlist"): indices = lig.atlist - else: indices = None - - if hasattr(lig,"radii"): radii = lig.radii - else: radii = None - - if debug > 0 and parent is None: print("IMPORT LIGAND: parent is NONE") - - new_ligand = ligand(labels, coord, indices, radii, parent) - if debug > 0: print(f"IMPORT LIGAND: created new_ligand with {new_ligand.formula}") - - ## Charges - if hasattr(lig,"totcharge") and hasattr(lig,"atcharge"): - new_ligand.set_charges(lig.totcharge, lig.atcharge) - elif hasattr(lig,"totcharge") and not hasattr(lig,"atcharge"): - new_ligand.set_charges(lig.totcharge) - - ## Smiles - if hasattr(lig,"Smiles"): new_ligand.smiles = lig.Smiles - elif hasattr(lig,"smiles"): new_ligand.smiles = lig.smiles - - ## Rdkit Object - if hasattr(lig,"object"): new_ligand.rdkit_obj = lig.object - - ## Substructures - if not hasattr(lig,"grouplist"): new_ligand.split_ligand() - else: - groups = [] - for gr in lig.grouplist: - new_group = import_group(gr, parent=new_ligand, debug=debug) - groups.append(new_group) - new_ligand.groups = groups - - ## Atoms - if not hasattr(lig,"atoms"): new_ligand.set_atoms() - else: - atoms = [] - for at in lig.atoms: - new_atom = import_atom(at, parent=new_ligand, debug=debug) - atoms.append(new_atom) - new_ligand.set_atoms(atoms) - - return new_ligand - -################################ -def import_group(old_group: object, parent: object=None, debug: int=0) -> object: - assert hasattr(old_group,"atlist") or hasattr(old_group,"indices") - - if hasattr(old_group,"labels"): labels = old_group.labels - elif parent is not None: - if hasattr(parent,"labels"): labels = extract_from_list(old_group.atlist, parent.labels, dimension=1) - - if hasattr(old_group,"coord"): coord = old_group.coord - elif hasattr(old_group,"pos"): coord = old_group.pos - elif parent is not None: - if hasattr(parent,"coord"): coord = extract_from_list(old_group.atlist, parent.coord, dimension=1) - elif hasattr(parent,"coord"): coord = extract_from_list(old_group.atlist, parent.pos, dimension=1) - - if hasattr(old_group,"indices"): indices = old_group.indices - elif hasattr(old_group,"atlist"): indices = old_group.atlist - else: indices = None - - if hasattr(old_group,"radii"): radii = old_group.radii - else: radii = None - - if parent is None: print("IMPORT GROUP: parent is NONE") - - new_group = group(labels, coord, indices, radii, parent) - if debug > 0: print(f"IMPORT GROUP: created new_group with {new_group.formula}") - - ## Charges - if hasattr(old_group,"totcharge") and hasattr(old_group,"atcharge"): - new_group.set_charges(old_group.totcharge, old_group.atcharge) - elif hasattr(old_group,"totcharge") and not hasattr(old_group,"atcharge"): - new_group.set_charges(old_group.totcharge) - - ## Smiles - if hasattr(old_group,"Smiles"): new_group.smiles = old_group.Smiles - elif hasattr(old_group,"smiles"): new_group.smiles = old_group.smiles - - ## Atoms - if not hasattr(old_group,"atoms"): new_group.set_atoms() - else: - atoms = [] - for at in old_group.atoms: - new_atom = import_atom(at, parent=new_group, debug=debug) - atoms.append(new_atom) - new_group.set_atoms(atoms) - - return new_group - -################################ -def import_atom(old_atom: object, parent: object=None, debug: int=0) -> object: - assert hasattr(old_atom,"label") and (hasattr(old_atom,"coord") or hasattr(old_atom,"pos")) - label = old_atom.label - - if hasattr(old_atom,"coord"): coord = old_atom.coord - elif hasattr(old_atom,"pos"): coord = old_atom.pos - - if hasattr(old_atom,"index"): index = old_atom.index - elif hasattr(old_atom,"atlist"): index = old_atom.atlist - else: index = None - - if hasattr(old_atom,"radii"): radii = old_atom.radii - else: radii = None - - if hasattr(old_atom,"block"): block = old_atom.block - else: block = elemdatabase.elementblock[label] - - if parent is None: print("IMPORT ATOM: parent is NONE") - - if block == 'd' or block == 'f': - new_atom = metal(label, coord, index=index, radii=radii, parent=parent) - if debug > 0: print(f"IMPORT ATOM: created new_metal {new_atom.label}") - else: - new_atom = atom(label, coord, index=index, radii=radii, parent=parent) - if debug > 0: print(f"IMPORT ATOM: created new_atom {new_atom.label}") - - ## Charge. For some weird reason, charge is "charge" in metals, and "totcharge" in regular atoms - if hasattr(old_atom,"charge"): - if type(old_atom.charge) == (int): new_atom.set_charge(old_atom.charge) - elif hasattr(old_atom,"totcharge"): - if type(old_atom.totcharge) == int: new_atom.set_charge(old_atom.totcharge) - - return new_atom diff --git a/cell2mol/sergi_connectivity.py b/cell2mol/sergi_connectivity.py deleted file mode 100644 index b812d8e2..00000000 --- a/cell2mol/sergi_connectivity.py +++ /dev/null @@ -1,369 +0,0 @@ -import warnings -import numpy as np -from scipy import sparse -from scipy.sparse import csr_matrix -from scipy.sparse.csgraph import reverse_cuthill_mckee -from typing import Tuple -from Scope.Elementdata import ElementData -elemdatabase = ElementData() - -#from cell2mol.tmcharge_common import Cell, atom, molecule, ligand, metal - -################################ -def extract_from_list(entrylist: list, old_array: list, dimension: int=2, debug: int=0) -> list: - #if debug >= 0: print("EXTRACT_FROM_LIST. received:", len(entrylist), np.max(entrylist)+1, len(old_array)) - length = len(entrylist) - if dimension == 2: - new_array = np.empty((length, length), dtype=object) - for idx, row in enumerate(entrylist): - for jdx, col in enumerate(entrylist): - new_array[idx, jdx] = old_array[row][col] - elif dimension == 1: - new_array = np.empty((length), dtype=object) - for idx, val in enumerate(entrylist): - new_array[idx] = old_array[val] - return list(new_array) - -################################ -def printxyz(labels, pos): - print(len(labels)) - print("") - for idx, l in enumerate(labels): - print("%s %.6f %.6f %.6f" % (l, pos[idx][0], pos[idx][1], pos[idx][2])) - -################################ -def labels2formula(labels: list): - elems = elemdatabase.elementnr.keys() - formula=[] - for z in elems: - nz = labels.count(z) - if nz > 1: - formula.append(f"{z}{nz}-") - if nz == 1: - formula.append(f"{z}-") - formula = ''.join(formula)[:-1] - return formula - -################################ -def labels2ratio(labels): - elems = elemdatabase.elementnr.keys() - ratio=[] - for z in elems: - nz = labels.count(z) - if nz > 0: ratio.append(nz) - return ratio - -################################ -def labels2electrons(labels): - eleccount = 0 - for l in labels: - eleccount += elemdatabase.elementnr[l] - return eleccount - -################################ -def get_element_count(labels: list, heavy_only: bool=False) -> np.ndarray: - elems = list(elemdatabase.elementnr.keys()) - count = np.zeros((len(elems)),dtype=int) - for l in labels: - for jdx, elem in enumerate(elems): - if l == elem: count[jdx] += 1 - if (l == 'H' or l == 'D') and heavy_only: count = 0 - return count - -################################ -def get_adjacency_types(label: list, conmat: np.ndarray) -> np.ndarray: - elems = elemdatabase.elementnr.keys() - natoms = len(label) - bondtypes = np.zeros((len(elems), len(elems)),dtype=int) - found = np.zeros((natoms, natoms)) - - for i in range(0, natoms): - for j in range(i, natoms): - if i != j: - if (conmat[i, j] == 1) and (found[i, j] == 0): - for k, elem1 in enumerate(elems): - if label[i] == elem1: - for l, elem2 in enumerate(elems): - if label[j] == elem2: - bondtypes[k, l] += 1 - if elem1 != elem2: - bondtypes[l, k] += 1 - found[i, j] = 1 - found[j, i] = 1 - break - break - return bondtypes - -################################ -def get_radii(labels: list) -> np.ndarray: - radii = [] - for l in labels: - if l[-1].isdigit(): label = l[:-1] - else: label = l - radii.append(elemdatabase.CovalentRadius2[label]) - return np.array(radii) - -#################################### -def get_adjmatrix(labels: list, pos: list, cov_factor: float, radii="default") -> Tuple[int, list, list]: - isgood = True - clash_threshold = 0.3 - natoms = len(labels) - adjmat = np.zeros((natoms, natoms)) - adjnum = np.zeros((natoms)) - - # Sometimes argument radii np.ndarry, or list - with warnings.catch_warnings(): - warnings.simplefilter(action="ignore", category=FutureWarning) - if type(radii) == str: - if radii == "default": - radii = get_radii(labels) - - # Creates Adjacency Matrix - for i in range(0, natoms - 1): - for j in range(i, natoms): - if i != j: - a = np.array(pos[i]) - b = np.array(pos[j]) - dist = np.linalg.norm(a - b) - thres = (radii[i] + radii[j]) * cov_factor - if dist <= clash_threshold: - isgood = False # invalid molecule - print("Adjacency Matrix: Distance", dist, "smaller than clash for atoms", i, j) - elif dist <= thres: - adjmat[i, j] = 1 - adjmat[j, i] = 1 - - # Sums the adjacencies of each atom to obtain "adjnum" - for i in range(0, natoms): - adjnum[i] = np.sum(adjmat[i, :]) - - adjmat = adjmat.astype(int) - adjnum = adjnum.astype(int) - return isgood, adjmat, adjnum - -#################################### -def inv(perm: list) -> list: - inverse = [0] * len(perm) - for i, p in enumerate(perm): - inverse[p] = i - return inverse - -#################################### -def get_blocks(matrix: np.ndarray) -> Tuple[list, list]: - # retrieves the blocks from a diagonal block matrix - startlist = [] # List including the starting atom for all blocks - endlist = [] # List including the final atom for all blocks - start = 1 - pos = start - posold = 0 - blockcount = 0 - j = 1 - while j < len(matrix): - if matrix[pos - 1, j] != 0.0: pos = j + 1 - if j == len(matrix) - 1: - blockcount = blockcount + 1 - startlist.append(posold) - endlist.append(pos - 1) - posold = pos - pos = pos + 1 - j = pos - 1 - continue - j += 1 - - if (blockcount == 0) and (len(matrix) == 1): # if a 1x1 matrix is provided, it then finds 1 block - startlist.append(0) - endlist.append(0) - return startlist, endlist - -#################################### -#def get_molecules(labels: list, pos: list, factor: float=1.3, debug: int=0) -> Tuple[bool, list]: -# ## Function that identifies connected groups of atoms from their atomic coordinates and labels. -# -# # Gets the covalent radii -# radii = get_radii(labels) -# -# # Computes the adjacency matrix of what is received -# isgood, adjmat, adjnum = get_adjmatrix(labels, pos, factor, radii) -# -# # isgood indicates whether the adjacency matrix could be built normally, or errors were detected. Typically, those errors are steric clashes -# if isgood: -# degree = np.diag(adjnum) # creates a matrix with adjnum as diagonal values. Needed for the laplacian -# lap = adjmat - degree # computes laplacian -# -# # creates block matrix -# graph = csr_matrix(lap) -# perm = reverse_cuthill_mckee(graph) -# gp1 = graph[perm, :] -# gp2 = gp1[:, perm] -# dense = gp2.toarray() -# -# # detects blocks in the block diagonal matrix called "dense" -# startlist, endlist = get_blocks(dense) -# -# nmolec = len(startlist) -# # keeps track of the atom movement within the matrix. Needed later -# atomlist = np.zeros((len(dense))) -# for b in range(0, nmolec): -# for i in range(0, len(dense)): -# if (i >= startlist[b]) and (i <= endlist[b]): -# atomlist[i] = b + 1 -# invperm = inv(perm) -# atomlistperm = [int(atomlist[i]) for i in invperm] -# -# # assigns atoms to molecules -# moleclist = [] -# for b in range(0, nmolec): -# labelist = [] -# poslist = [] -# atlist = [] # atom indices in the original ordering -# for i in range(0, len(atomlistperm)): -# if atomlistperm[i] == b + 1: -# labelist.append(labels[i]) -# poslist.append(pos[i]) -# atlist.append(i) -# radiilist = get_radii(labelist) -# -# newmolec = molecule(str(b), atlist, labelist, poslist, radiilist) -# newmolec.information(1.3,1.0) -# -# moleclist.append(newmolec) -# #moleclist.append(list([atlist, labelist, poslist, radiilist])) -# return moleclist - -################################ -def compute_centroid(coord: list) -> list: - natoms = len(coord) - x = 0 - y = 0 - z = 0 - for idx in range(natoms): - x += coord[idx][0]; y += coord[idx][1]; z += coord[idx][2] - centroid = [float(x / natoms), float(y / natoms), float(z / natoms)] - return np.array(centroid) -######################### - -######################### -def count_species(labels: list, pos: list, radii: list=None, indices: list=None, cov_factor: float=1.3, debug: int=0) -> Tuple[bool, list]: - # Gets the covalent radii - if radii is None: radii = get_radii(labels) - if indices is None: indices = [*range(0,len(labels),1)] - - # Computes the adjacency matrix of what is received - # isgood indicates whether the adjacency matrix could be built normally, or errors were detected. Typically, those errors are steric clashes - isgood, adjmat, adjnum = get_adjmatrix(labels, pos, cov_factor, radii) - if not isgood: return int(0) - - degree = np.diag(adjnum) # creates a matrix with adjnum as diagonal values. Needed for the laplacian - lap = adjmat - degree # computes laplacian - - # creates block matrix - graph = csr_matrix(lap) - perm = reverse_cuthill_mckee(graph) - gp1 = graph[perm, :] - gp2 = gp1[:, perm] - dense = gp2.toarray() - - # detects blocks in the block diagonal matrix called "dense" - startlist, endlist = get_blocks(dense) - - nblocks = len(startlist) - return nblocks -######################### - -#################################### -def split_species(labels: list, pos: list, radii: list=None, indices: list=None, cov_factor: float=1.3, debug: int=0) -> Tuple[bool, list]: - ## Function that identifies connected groups of atoms from their atomic coordinates and labels. - - # Gets the covalent radii - if radii is None: radii = get_radii(labels) - if indices is None: indices = [*range(0,len(labels),1)] - - # Computes the adjacency matrix of what is received - # isgood indicates whether the adjacency matrix could be built normally, or errors were detected. Typically, those errors are steric clashes - isgood, adjmat, adjnum = get_adjmatrix(labels, pos, cov_factor, radii) - if not isgood: return None - - degree = np.diag(adjnum) # creates a matrix with adjnum as diagonal values. Needed for the laplacian - lap = adjmat - degree # computes laplacian - - # creates block matrix - graph = csr_matrix(lap) - perm = reverse_cuthill_mckee(graph) - gp1 = graph[perm, :] - gp2 = gp1[:, perm] - dense = gp2.toarray() - - # detects blocks in the block diagonal matrix called "dense" - startlist, endlist = get_blocks(dense) - - nblocks = len(startlist) - # keeps track of the atom movement within the matrix. Needed later - atomlist = np.zeros((len(dense))) - for b in range(0, nblocks): - for i in range(0, len(dense)): - if (i >= startlist[b]) and (i <= endlist[b]): - atomlist[i] = b + 1 - invperm = inv(perm) - atomlistperm = [int(atomlist[i]) for i in invperm] - - # assigns atoms to molecules - blocklist = [] - for b in range(0, nblocks): - atlist = [] # atom indices in the original ordering - for i in range(0, len(atomlistperm)): - if atomlistperm[i] == b + 1: - atlist.append(indices[i]) - blocklist.append(atlist) - return blocklist - -##################### -def merge_atoms(atoms): - labels = [] - coord = [] - for a in atoms: - labels.append(a.label) - coords.append(a.coord) - return labels, coord - -################################# -def compare_atoms(at1, at2, debug: int=0): - if debug > 0: - print("Comparing Atoms") - print(at1) - print(at2) - if (at1.label != at2.label): return False - if hasattr(at1,"charge") and hasattr(at2,"charge"): - if (at1.charge != at2.charge): return False - if hasattr(at1,"spin") and hasattr(at2,"spin"): - if (at1.spin != at2.spin): return False - return True - -################################# -def compare_species(mol1, mol2, debug: int=0): - if debug > 0: - print("Comparing Species") - print(mol1) - print(mol2) - # a pair of species is compared on the basis of: - # 1) the total number of atoms - if (mol1.natoms != mol2.natoms): return False - - # 2) the total number of electrons (as sum of atomic number) - if (mol1.eleccount != mol2.eleccount): return False - - # 3) the number of atoms of each type - if not hasattr(mol1,"element_count"): mol1.set_element_count() - if not hasattr(mol2,"element_count"): mol2.set_element_count() - for kdx, elem in enumerate(mol1.element_count): - if elem != mol2.element_count[kdx]: return False - - # 4) the number of adjacencies between each pair of element types - if not hasattr(mol1,"adj_types"): mol1.set_adj_types() - if not hasattr(mol2,"adj_types"): mol2.set_adj_types() - for kdx, elem in enumerate(mol1.adj_types): - for ldx, elem2 in enumerate(elem): - if elem2 != mol2.adj_types[kdx, ldx]: return False - return True -################################# - - diff --git a/cell2mol/sergi_reconstruct.py b/cell2mol/sergi_reconstruct.py deleted file mode 100644 index 5352c677..00000000 --- a/cell2mol/sergi_reconstruct.py +++ /dev/null @@ -1,697 +0,0 @@ -import itertools -import warnings -from scipy import sparse -from scipy.sparse import csr_matrix -from scipy.sparse.csgraph import reverse_cuthill_mckee - -from Scope.Adapted_from_cell2mol import * -from Scope.Classes_Molecule import specie, molecule, ligand, atom, metal, cell -from Scope.Unit_cell_tools import * -from Scope.Elementdata import ElementData -elemdatabase = ElementData() - -####################################################### -def classify_fragments(blocklist: list, refmoleclist: list, debug: int=0): - init_natoms = 0 - moleclist = [] - fraglist = [] - Hlist = [] - - # Classifies blocks and puts them in 3 bags. (1) Full molecules, (2) partial molecules=fragments, (3) Hydrogens - for idx, block in enumerate(blocklist): - if not hasattr(block,"numH"): block.numH = block.set_element_count()[4] - if (block.natoms == 1) and (block.numH == 1): - block.subtype = "H" - Hlist.append(block) - else: - found = False - for ref in refmoleclist: - issame = compare_species(ref, block) - if issame: - block.subtype = "molecule" - moleclist.append(block) - found = True - if not found: - block.subtype = "fragment" - fraglist.append(block) - - if debug > 0: print(len(blocklist),"Blocks sorted as (Molec, Frag, H):",len(moleclist),len(fraglist),len(Hlist)) - return moleclist, fraglist, Hlist - -####################################################### -def tmatgenerator(centroid, thres=0.40, full=False): - # This function generates a list of the translations that a fragment should undergo depending on the centroid of its fractional coordinates - # For instance, if the centroid of a fragment is at 0.9 in any given axis, it is unlikely that a one-cell-length translation along such axis (resulting in 1.9) would help. - # Also, a fragment right at the center of the unit cell (centroid=(0.5, 0.5, 0.5) is unlikely to require reconstruction - # The threshold defines the window. If thres=0.4, the function will suggest positive translation for any fragment between 0 and 0.4, and negative translation between 0.6 and 1.0. - # If full is asked, then all translations are applied - - tmax = 1 - thres - tmin = thres - - if not full: - tmatrix = [] - tmatrix = additem((0, 0, 0), tmatrix) - - # X positive - if centroid[0] >= tmax: - tmatrix = additem((-1, 0, 0), tmatrix) - if centroid[1] >= tmax: - tmatrix = additem((-1, -1, 0), tmatrix) - tmatrix = additem((0, -1, 0), tmatrix) - if centroid[2] >= tmax: - tmatrix = additem((-1, -1, -1), tmatrix) - tmatrix = additem((0, -1, -1), tmatrix) - tmatrix = additem((0, 0, -1), tmatrix) - if centroid[2] <= tmin: - tmatrix = additem((-1, -1, 1), tmatrix) - tmatrix = additem((0, -1, 1), tmatrix) - tmatrix = additem((0, 0, 1), tmatrix) - if centroid[1] <= tmin: - tmatrix = additem((-1, 1, 0), tmatrix) - tmatrix = additem((0, 1, 0), tmatrix) - if centroid[2] >= tmax: - tmatrix = additem((-1, 1, -1), tmatrix) - tmatrix = additem((0, 1, -1), tmatrix) - tmatrix = additem((0, 0, -1), tmatrix) - if centroid[2] <= tmin: - tmatrix = additem((-1, 1, 1), tmatrix) - tmatrix = additem((0, 1, 1), tmatrix) - tmatrix = additem((0, 0, 1), tmatrix) - if centroid[2] >= tmax: - tmatrix = additem((-1, 0, -1), tmatrix) - tmatrix = additem((0, 0, -1), tmatrix) - if centroid[2] <= tmin: - tmatrix = additem((-1, 0, 1), tmatrix) - tmatrix = additem((0, 0, 1), tmatrix) - - if centroid[1] >= tmax: - tmatrix = additem((0, -1, 0), tmatrix) - if centroid[2] >= tmax: - tmatrix = additem((0, -1, -1), tmatrix) - tmatrix = additem((0, 0, -1), tmatrix) - if centroid[2] <= tmin: - tmatrix = additem((0, -1, 1), tmatrix) - tmatrix = additem((0, 0, 1), tmatrix) - - if centroid[2] >= tmax: - tmatrix = additem((0, 0, -1), tmatrix) - - if centroid[0] <= tmin: - tmatrix = additem((1, 0, 0), tmatrix) - if centroid[1] <= tmin: - tmatrix = additem((1, 1, 0), tmatrix) - tmatrix = additem((0, 1, 0), tmatrix) - if centroid[2] <= tmin: - tmatrix = additem((1, 1, 1), tmatrix) - tmatrix = additem((0, 1, 1), tmatrix) - tmatrix = additem((0, 0, 1), tmatrix) - if centroid[2] >= tmax: - tmatrix = additem((1, 1, -1), tmatrix) - tmatrix = additem((0, 1, -1), tmatrix) - tmatrix = additem((0, 0, -1), tmatrix) - if centroid[1] >= tmax: - tmatrix = additem((1, -1, 0), tmatrix) - tmatrix = additem((0, -1, 0), tmatrix) - if centroid[2] >= tmax: - tmatrix = additem((1, -1, -1), tmatrix) - if centroid[2] <= tmin: - tmatrix = additem((1, -1, 1), tmatrix) - if centroid[2] <= tmin: - tmatrix = additem((1, 0, 1), tmatrix) - tmatrix = additem((0, 0, 1), tmatrix) - if centroid[2] >= tmax: - tmatrix = additem((1, 0, -1), tmatrix) - tmatrix = additem((0, 0, -1), tmatrix) - - if centroid[1] <= tmin: - tmatrix = additem((0, 1, 0), tmatrix) - if centroid[2] <= tmin: - tmatrix = additem((0, 1, 1), tmatrix) - tmatrix = additem((0, 0, 1), tmatrix) - if centroid[2] >= tmax: - tmatrix = additem((0, 1, -1), tmatrix) - tmatrix = additem((0, 0, -1), tmatrix) - if centroid[2] <= tmin: - tmatrix = additem((0, 0, 1), tmatrix) - - if (centroid[0] > tmin) and (centroid[0] < tmax): - if centroid[1] <= tmin: - tmatrix = additem((0, 1, 0), tmatrix) - if centroid[2] >= tmax: - tmatrix = additem((0, 1, -1), tmatrix) - if centroid[2] <= tmin: - tmatrix = additem((0, 1, 1), tmatrix) - if centroid[1] >= tmax: - tmatrix = additem((0, -1, 0), tmatrix) - if centroid[2] >= tmax: - tmatrix = additem((0, -1, -1), tmatrix) - if centroid[2] <= tmin: - tmatrix = additem((0, -1, 1), tmatrix) - if centroid[2] <= tmin: - tmatrix = additem((0, 0, 1), tmatrix) - if centroid[1] >= tmax: - tmatrix = additem((0, -1, 1), tmatrix) - if centroid[1] <= tmin: - tmatrix = additem((0, 1, 1), tmatrix) - if centroid[2] >= tmax: - tmatrix = additem((0, 0, -1), tmatrix) - if centroid[1] >= tmax: - tmatrix = additem((0, -1, -1), tmatrix) - if centroid[1] <= tmin: - tmatrix = additem((0, 1, -1), tmatrix) - elif full: - import itertools - - x = [-1, 0, 1] - tmatrix = [p for p in itertools.product(x, repeat=3)] - - tmatrix.sort(key=absolute_value) - - return tmatrix - -####################################################### -def additem(item, vector): - if item not in vector: - vector.append(item) - return vector - -####################################################### -def absolute_value(num): - sum = 0 - for i in num: - sum += np.abs(i) - return abs(sum) - -####################################################### -def assign_subtype(molecule: object, references: list) -> str: - for ref in references: - issame = compare_species(molecule, ref) - if issame: - if ref.iscomplex: return "Complex" - else: return "Other" - # If not in references - if molecule.iscomplex: return "Complex" - else: return "Other" - -####################################################### -def fragments_reconstruct(moleclist: list, fraglist: list, Hlist: list, refmoleclist: list, cellvec: list, factor: float=1.3, metal_factor: float=1.0, debug: int=0): - - Warning = False - # Reconstruct Heavy Fragments - if len(fraglist) > 1: - print("") - print("##############################################") - print(len(fraglist), "molecules submitted to SEQUENTIAL with Heavy") - print("##############################################") - newmols, remfrag = sequential(fraglist, refmoleclist, cellvec, factor, metal_factor, "Heavy", debug) - print(f"{len(newmols)} molecules and {len(remfrag)} fragments out of SEQUENTIAL with Heavy") - moleclist.extend(newmols) - fraglist = [] - fraglist.extend(remfrag) - fraglist.extend(Hlist) - - # For debugging - if debug >= 1: - print(" ") - # Prints molecules after Heavy Fragment Reconstruction - if len(newmols) > 0: - for mol in newmols: - print("Molec reconstructed after Heavy", mol.natoms, mol.formula, mol.type) - else: - print("NO Molecules reconstructed after Heavy") - if len(remfrag) > 0: - for rem in remfrag: - print("Remaining after Heavy", rem.natoms, rem.formula, rem.subtype) - else: - print("NO remaining Molecules after Heavy") - print(" ") - else: - print("Only 0 or 1 heavy fragments. Skipping Heavy") - remfrag = fraglist.copy() - - # Reconstruct Hydrogens with remaining Fragments - if len(remfrag) > 0 and len(Hlist) > 0: - print("") - print("##############################################") - print(len(fraglist), "molecules submitted to sequential with All") - print("##############################################") - finalmols, remfrag = sequential(fraglist, refmoleclist, cellvec, factor, metal_factor, "All", debug) - if len(remfrag) > 0: - Warning = True - for rem in remfrag: - print("Remaining after Hydrogen reconstruction",rem.natoms,rem.formula,rem.subtype) - else: - print("NO remaining Molecules after Hydrogen reconstruction") - Warning = False - print(" ") - else: - if len(remfrag) > 0 and len(Hlist) == 0: - Warning = True - print("There are remaining Fragments and no H in list") - finalmols = [] - remfrag = [] - elif len(remfrag) == 0 and len(Hlist) > 0: - Warning = True - print("There are isolated H atoms in cell") - finalmols = [] - remfrag = [] - elif len(remfrag) == 0 and len(Hlist) == 0: - print("Not necessary to reconstruct Hydrogens") - finalmols = fraglist.copy() # IF not Hidrogen fragments, then is done - remfrag = [] - - return moleclist, finalmols, Warning - -####################################################### -def translate(vector, coords, cellvec): - newcoord = [] - for idx, coord in enumerate(coords): - newx = ( - coord[0] - + vector[0] * cellvec[0][0] - + vector[1] * cellvec[1][0] - + vector[2] * cellvec[2][0] - ) - newy = ( - coord[1] - + vector[0] * cellvec[0][1] - + vector[1] * cellvec[1][1] - + vector[2] * cellvec[2][1] - ) - newz = ( - coord[2] - + vector[0] * cellvec[0][2] - + vector[1] * cellvec[1][2] - + vector[2] * cellvec[2][2] - ) - newcoord.append([float(newx), float(newy), float(newz)]) - return newcoord - -####################################################### -def cart2frac(cartCoords, cellvec): - latCnt = [x[:] for x in [[None] * 3] * 3] - for a in range(3): - for b in range(3): - latCnt[a][b] = cellvec[b][a] - fracCoords = [] - detLatCnt = det3(latCnt) - for i in cartCoords: - aPos = (det3([ - [i[0], latCnt[0][1], latCnt[0][2]], - [i[1], latCnt[1][1], latCnt[1][2]], - [i[2], latCnt[2][1], latCnt[2][2]], - ] - ) - ) / detLatCnt - bPos = ( - det3( - [ - [latCnt[0][0], i[0], latCnt[0][2]], - [latCnt[1][0], i[1], latCnt[1][2]], - [latCnt[2][0], i[2], latCnt[2][2]], - ] - ) - ) / detLatCnt - cPos = ( - det3( - [ - [latCnt[0][0], latCnt[0][1], i[0]], - [latCnt[1][0], latCnt[1][1], i[1]], - [latCnt[2][0], latCnt[2][1], i[2]], - ] - ) - ) / detLatCnt - fracCoords.append([aPos, bPos, cPos]) - return fracCoords - -####################################################### -def sequential(fragmentlist: list, refmoleclist: list, cellvec: list, factor: float=1.3, metal_factor: float=1.0, typ: str="All", debug: int=1): - # Crappy function that controls the reconstruction process. It is called sequential because pairs of fragments are sent one by one. Ideally, a parallel version would be desirable. - # Given a list of fragments(fragmentlist), a list of reference molecules(refmoleclist), and some other minor parameters, the function sends pairs of fragments and evaluates if they... - # ...form a bigger fragment. If so, the bigger fragment is evaluated. If it coincides with one of the molecules in refmoleclist, than it means that it is a full molecule that requires no further work. - # ...if it does not, then it means that requires further reconstruction, and is again introduced in the loop. - # typ is a variable that defines how to combine the fragments. To speed up the process, this function is called twice in main. - # -First, to combine heavy fragments among themselves (typ="Heavy") - # -Second, to combie heavy fragments with H atoms (typ="All") - #:return molecsfoundlist, remainingfragments: lists of molecules and fragments, respectively, saved as objects - - if debug >= 1: print("Entered sequential with", len(fragmentlist), "fragments to reconstruct") - - # Finds How many atoms, at max, can a molecule have. It is used to skip meaningless combinations - maxatoms = 0 - for ref in refmoleclist: - if ref.natoms > maxatoms: - maxatoms = ref.natoms - - molecsfoundlist = [] - remainingfragments = [] - ################################################### - #### INPUT THAT CONTROLS THE ITERATIVE PROCESS #### - ################################################### - threshold_tmat = 0.40 - increase_tmat = 0.20 - fragtoallocate = 0 - Htoallocate = 0 - niter = 1 - maxiter = 3000 - mixsize = 1 - lastiter = 0 - lastitermargin = maxiter - ################################################### - - ################################################### - # Lists (list1 and list2) are created here depending on variable "typ" - ################################################### - for frag in fragmentlist: - if not hasattr(frag,"frac_centroid"): frag.get_centroid() - frag.tmatrix = tmatgenerator(frag.frac_centroid, threshold_tmat) - - remlist = [] - Hlist = [] - for frag in fragmentlist: - if (frag.natoms == 1) and (frag.numH == 1): - frag.subtype = "H" - Hlist.append(frag) - else: - frag.subtype = "Heavy" - remlist.append(frag) - if debug >= 1: print("Found",len(remlist),"and",len(Hlist),"Heavy and Hydrogen fragments, respectively") - if typ == "Heavy": - list1 = remlist.copy() - list2 = remlist.copy() - elif typ == "All": - list1 = remlist.copy() - list2 = Hlist.copy() - - ## Initial Fragment indices for each list - Frag1_toallocate = 0 - Frag2_toallocate = 0 - - ################# - ### MAIN LOOP ### - ################# - while (len(list1) > 0) and (len(list2) > 0): - - ################# - # This part decides which molecules in the two lists are sent to combine - ################# - STOP = False - Last_Attempt = False - - if niter > 1: Frag2_toallocate += 1 - if (Frag2_toallocate > len(list2) - 1): # Reaches the end of the second list. Restarts 2nd and advances 1st - Frag1_toallocate += 1 - Frag2_toallocate = 0 - if (Frag1_toallocate > len(list1) - 1): # Reaches the end of the first list. Restarts both - Frag1_toallocate = 0 - Frag2_toallocate = 0 - if typ == "Heavy": - if Frag1_toallocate == Frag2_toallocate: Frag2_toallocate += 1 - if typ == "Heavy": - if (Frag1_toallocate >= len(list1) - 1) and (Frag2_toallocate >= len(list2) - 2): STOP = True - elif typ == "All": - if (Frag1_toallocate >= len(list1) - 1) and (Frag2_toallocate >= len(list2) - 1): STOP = True - ################# - - ################# - # This part handles sublist, keeplist1 and keeplist2. They are necessary to handle the results of the function "Combine", which is called later. - ################# - if debug >= 1: print(" ") - if debug >= 1: print("Fragments to allocate:",Frag1_toallocate,Frag2_toallocate,"out of",len(list1)-1,len(list2)-1) - sublist = [] - keeplist1 = [] - keeplist2 = [] - if typ == "Heavy": - for i in range(0, len(list1)): - if i == Frag1_toallocate: sublist.append(list1[i]) - elif i != Frag2_toallocate: keeplist1.append(list1[i]) - for i in range(0, len(list2)): - if i == Frag2_toallocate: sublist.append(list2[i]) - elif i != Frag1_toallocate: keeplist2.append(list2[i]) - elif typ == "All": - for i in range(0, len(list1)): - if i == Frag1_toallocate: sublist.append(list1[i]) - elif i != Frag1_toallocate: keeplist1.append(list1[i]) - for i in range(0, len(list2)): - if i == Frag2_toallocate: sublist.append(list2[i]) - elif i != Frag2_toallocate: keeplist2.append(list2[i]) - - ################# - # This part evaluates that the fragments that are going to be combined, can form one of the reference molecules. The resulting number of atoms is used. - ################# - if list1[Frag1_toallocate].natoms + list2[Frag2_toallocate].natoms > maxatoms: - if debug >= 1: print("SEQUENTIAL",typ,"SKIPPED",list1[Frag1_toallocate].natoms,"and",list2[Frag2_toallocate].natoms) - else: - if debug >= 1: print("SEQUENTIAL",typ,"iteration",niter,"with",len(list1),"and",len(list2),"Remaining in each list") - if debug >= 1: print("SEQUENTIAL",typ,"sending",list1[Frag1_toallocate].labels,"and",list2[Frag2_toallocate].labels,"to combine") - - ################# - # Here, the function "combine" is called. It will try cell translations of one fragment, and check whether it eventually combines with the second fragment into either a bigger fragment or a molecule - ################# - goodlist, avglist, badlist = combine(sublist, refmoleclist, cellvec, threshold_tmat, factor, metal_factor, debug=debug) - - ################# - # This part handles the results of combine - ################# - if (len(goodlist) > 0) or (len(avglist) > 0): - # it means that the function combine worked. Thus, it restarts the fragments to allocate - lastiter = niter - Frag1_toallocate = 0 - Frag2_toallocate = 0 - - # Adds the found molecule to the appropriate list - for g in goodlist: - molecsfoundlist.append(g) - if debug >= 1: print(f"SEQUENTIAL: Found molecule {g.formula}") - - # Reconstructs list1 and list2 - list1 = [] - list2 = [] - for a in avglist: - list1.append(a) - if typ == "Heavy": list2.append(a) - - if len(badlist) > 1: - if typ == "Heavy": - list1.append(badlist[0]) - list1.append(badlist[1]) - list2.append(badlist[0]) - list2.append(badlist[1]) - elif typ != "Heavy": - list1.append(badlist[0]) - list2.append(badlist[1]) - for k1 in keeplist1: - list1.append(k1) - for k2 in keeplist2: - list2.append(k2) - - if len(list1) + len(list2) == 0: - print("FINISHED succesfully") - break - - if typ == "Heavy": - if len(list1) == 1: - for l in list1: - remainingfragments.append(l) - print("FINISHED with Remaining Fragment") - break - - if (len(list1) == 0) and (len(list2) == 0): - print("FINISHED succesfully") - break - - ################# - # This part decides whether the WHILE loop must finish. - ################# - niter += 1 - if niter > maxiter: STOP = True - if niter > (lastiter + lastitermargin): STOP = True - if not STOP: continue - else: - if (threshold_tmat <= (1.0 - increase_tmat)) or Last_Attempt: - STOP = False - threshold_tmat += increase_tmat - if threshold_tmat >= 1: Last_Attempt = True - if not Last_Attempt: - maxsize = 0 - for l in list1: - l.tmatrix = tmatgenerator(l.centroid, threshold_tmat) - if len(l.tmatrix) > maxsize: maxsize = len(l.tmatrix) - for l in list2: - l.tmatrix = tmatgenerator(l.centroid, threshold_tmat) - if len(l.tmatrix) > maxsize: maxsize = len(l.tmatrix) - if debug >= 1: print(" Increased Threshold_tmat. Now:", threshold_tmat) - if debug >= 1: print(" Maxsize of the translation matrix is=", maxsize) - elif Last_Attempt: - for l in list1: - l.tmatrix = tmatgenerator(l.centroid, threshold_tmat, full=True) - for l in list2: - l.tmatrix = tmatgenerator(l.centroid, threshold_tmat, full=True) - if debug >= 1: print("Trying Full Tmatrix for all Items in list") - - niter = 1 - Frag1_toallocate = 0 - Frag2_toallocate = 0 - else: - for l in list1: - if debug >= 1: print("Sequential: list1 end:", l.labels) - remainingfragments.append(l) - for l in list2: - if typ == "All" and debug >= 1: print("Sequential: list2 end:", l.labels) - if typ == "All": remainingfragments.append(l) - break - - return molecsfoundlist, remainingfragments - -####################################################### -def combine(tobemerged: list, references: list, cellvec: list, threshold_tmat: float, cov_factor: float, metal_factor: float, debug: int=0): - goodlist = [] ## List of molecules coming from the two fragments received - avglist = [] ## List of bigger fragments coming from the two fragments received - badlist = [] ## List of fragments as they entered the function - - ## Merges the coordinates of both fragments, and finds species - newmolec = merge_fragments(tobemerged, references, cellvec, cov_factor, metal_factor, debug=debug) - if newmolec is not None and debug >= 1: print("COMBINE. received molecule:", newmolec, "from merge fragments") - - ## Steric Clashes, or more than one fragment retrieved - if newmolec is None: - badlist.append(tobemerged[0]) - badlist.append(tobemerged[1]) - - ## Single specie retrieved - if newmolec is not None: - newmolec.get_fractional_coord(cellvec) - newmolec.get_centroid() - newmolec.tmatrix = tmatgenerator(newmolec.frac_centroid, threshold_tmat) - - found = False - for ref in references: - if not found: - issame = compare_species(newmolec, ref) - if issame: ## Then is a molecule that appears in the reference list - found = True - newmolec.subtype = ref.subtype - goodlist.append(newmolec) - if debug >= 1: print("COMBINE: Fragment",newmolec.formula,"added to goodlist") - if not found: ## Then it is a fragment. A bigger one, but still a fragment - newmolec.subtype = "Rec. Fragment" - avglist.append(newmolec) - if debug >= 1: print("COMBINE: Fragment", newmolec.formula, "added to avglist") - - return goodlist, avglist, badlist - -####################################################### -def merge_fragments(frags: list, refs: list, cellvec: list, cov_factor: float=1.3, metal_factor: float=1.0, debug: int=0): - # finds biggest fragment and keeps it in the original cell - sizes = [] - for f in frags: - size = f.natoms - sizes.append(size) - keep_idx = np.argmax(sizes) - if keep_idx == 0: move_idx = 1 - elif keep_idx == 1: move_idx = 0 - keep_frag = frags[keep_idx] - move_frag = frags[move_idx] - if debug > 0: print("MERGE_FRAGMENTS: keep_idx", keep_idx) - if debug > 0: print("MERGE_FRAGMENTS: move_idx", move_idx) - if debug > 0: print("MERGE_FRAGMENTS: move_frag.tmatrix", move_frag.tmatrix) - - #applytranspose = list(itertools.product(*move_frag.tmatrix)) - #print("applytranspose", applytranspose) - if len(move_frag.tmatrix) == 0: return None - for t in move_frag.tmatrix: - if debug > 0: print("MERGE_FRAGMENTS: translation", t) - ## Applies Translations and each time, it checks if a bigger molecule is formed - ## meaning that the translation was successful - reclabels = [] - reclabels.extend(keep_frag.labels) - reclabels.extend(move_frag.labels) - reccoord = [] - reccoord.extend(keep_frag.coord) - if t == (0, 0, 0): reccoord.extend(move_frag.coord) - else: reccoord.extend(translate(t, move_frag.coord, cellvec)) - - ## Evaluate if we get only one fragment. If so, we're ok: - numspecs = count_species(reclabels, reccoord, cov_factor=cov_factor, debug=debug) - if debug > 0: print("MERGE_FRAGMENTS: count_species found", numspecs) - if numspecs != 1: continue - blocklist = split_species(reclabels, reccoord, cov_factor=cov_factor, debug=debug) - if blocklist is None: continue - else: - if len(blocklist) != 1: continue - if len(blocklist) == 1: - newmolec = molecule(reclabels, reccoord) - newmolec.set_adjacency_parameters(cov_factor, metal_factor) - newmolec.set_adj_types() - newmolec.set_element_count() - newmolec.get_adjmatrix() - newmolec.get_centroid() - return newmolec - return None - -#################################### -def getconec(labels: list, pos: list, factor: float, radii=None): - status = 1 # good molecule, no clashes yet - clash = 0.3 - natoms = len(labels) - conmat = np.zeros((natoms, natoms)) - connec = np.zeros((natoms)) - mconmat = np.zeros((natoms, natoms)) - mconnec = np.zeros((natoms)) - # Sometimes argument radii np.ndarry, or list - with warnings.catch_warnings(): - warnings.simplefilter(action="ignore", category=FutureWarning) - if radii is None: radii = getradii(labels) - - for i in range(0, natoms - 1): - for j in range(i, natoms): - if i != j: - # print(i,j) - a = np.array(pos[i]) - b = np.array(pos[j]) - dist = np.linalg.norm(a - b) - thres = (radii[i] + radii[j]) * factor - if dist <= clash: - status = 0 # invalid molecule - print("GETCONEC: Distance", dist, "smaller than clash for atoms", i, j) - elif dist <= thres: - conmat[i, j] = 1 - conmat[j, i] = 1 - if ( - elemdatabase.elementblock[labels[i]] == "d" - or elemdatabase.elementblock[labels[i]] == "f" - or elemdatabase.elementblock[labels[j]] == "d" - or elemdatabase.elementblock[labels[j]] == "f" - ): - mconmat[i, j] = 1 - mconmat[j, i] = 1 - - for i in range(0, natoms): - connec[i] = np.sum(conmat[i, :]) - mconnec[i] = np.sum(mconmat[i, :]) - - conmat = conmat.astype(int) - mconmat = mconmat.astype(int) - connec = connec.astype(int) - mconnec = mconnec.astype(int) - # return status, np.array(conmat), np.array(connec), np.array(mconmat), np.array(mconnec) - return status, conmat, connec, mconmat, mconnec - -####################################################### -def det3(mat): - return ( - (mat[0][0] * mat[1][1] * mat[2][2]) - + (mat[0][1] * mat[1][2] * mat[2][0]) - + (mat[0][2] * mat[1][0] * mat[2][1]) - - (mat[0][2] * mat[1][1] * mat[2][0]) - - (mat[0][1] * mat[1][0] * mat[2][2]) - - (mat[0][0] * mat[1][2] * mat[2][1])) - -################################ -def getradii(labels: list) -> np.ndarray: - radii = [] - for l in labels: - radii.append(elemdatabase.CovalentRadius2[l]) - return np.array(radii) -################################ diff --git a/cell2mol/tmcharge_common.py b/cell2mol/tmcharge_common.py deleted file mode 100755 index 6f504318..00000000 --- a/cell2mol/tmcharge_common.py +++ /dev/null @@ -1,1085 +0,0 @@ -#!/usr/bin/env python - -import warnings -import numpy as np -from scipy import sparse -from scipy.sparse import csr_matrix -from scipy.sparse.csgraph import reverse_cuthill_mckee -from cell2mol.elementdata import ElementData -from typing import Tuple - -elemdatabase = ElementData() - -################################ -def labels2formula(labels): - elems = elemdatabase.elementnr.keys() - formula=[] - for z in elems: - nz = labels.count(z) - if nz > 1: - formula.append(f"{z}{nz}-") - if nz == 1: - formula.append(f"{z}-") - formula = ''.join(formula)[:-1] - return formula - -################################ -def getelementcount(labels: list) -> np.ndarray: - elems = elemdatabase.elementnr.keys() - times = np.zeros((len(elems)),dtype=int) - for l in labels: - for jdx, elem in enumerate(elems): - if l == elem: - times[jdx] += 1 - return times - - -################################ -def getHvcount(labels: list) -> np.ndarray: - elems = elemdatabase.elementnr.keys() - times = np.zeros((len(elems)),dtype=int) - for l in labels: - if l != "H": - for jdx, elem in enumerate(elems): - if l == elem: - times[jdx] += 1 - return times - - -################################ -def get_adjacency_types(label: list, conmat: np.ndarray) -> np.ndarray: - elems = elemdatabase.elementnr.keys() - bondtypes = np.zeros((len(elems), len(elems)),dtype=int) - #bondtypes = np.zeros((len(elems), len(elems))).astype(int) - natoms = len(label) - found = np.zeros((natoms, natoms)) - - for i in range(0, natoms): - for j in range(i, natoms): - if i != j: - if (conmat[i, j] == 1) and (found[i, j] == 0): - for k, elem1 in enumerate(elems): - if label[i] == elem1: - for l, elem2 in enumerate(elems): - if label[j] == elem2: - bondtypes[k, l] += 1 - if elem1 != elem2: - bondtypes[l, k] += 1 - found[i, j] = 1 - found[j, i] = 1 - break - break - return bondtypes - - -################################ -def checkchemistry(molecule: object, references: list, typ: str="Max") -> int: - - elems = elemdatabase.elementnr.keys() - maxval = np.zeros((len(references[0].adjtypes), len(references[0].adjtypes))) - - for i in range(0, len(references[0].adjtypes)): - for j in range(0, len(references[0].adjtypes)): - lst = [] - for ref in references: - lst.append(ref.adjtypes[i, j]) - maxval[i, j] = np.max(lst) - - status = 1 # Good - if ( - typ == "Max" - ): # the bonds[j,k] of the molecule cannot be larger than the maximum within references - for i in range(0, len(molecule.adjtypes)): - for j in range(0, len(molecule.adjtypes)): - if molecule.adjtypes[i, j] > 0: - if molecule.adjtypes[i, j] > maxval[i, j]: - status = 0 # bad - return status - - -################################ -def getradii(labels: list) -> np.ndarray: - radii = [] - for l in labels: - radii.append(elemdatabase.CovalentRadius2[l]) - return np.array(radii) - - -################################ -def getcentroid(frac: list) -> list: - natoms = len(frac) - x = 0 - y = 0 - z = 0 - for idx, l in enumerate(frac): - x += frac[idx][0] - y += frac[idx][1] - z += frac[idx][2] - centroid = [float(x / natoms), float(y / natoms), float(z / natoms)] - return centroid - - -################################ -def extract_from_matrix(entrylist: list, old_array: np.ndarray, dimension: int=2) -> np.ndarray: - - length = len(entrylist) - - if dimension == 2: - new_array = np.empty((length, length)) - for idx, row in enumerate(entrylist): - for jdx, col in enumerate(entrylist): - new_array[idx, jdx] = old_array[row][col] - - elif dimension == 1: - new_array = np.empty((length)) - for idx, val in enumerate(entrylist): - new_array[idx] = old_array[val] - - return new_array - -################################ -covalent_factor_for_metal = { - "H": 1.30, - "D": 1.30, - "He": 1.30, - "Li": 1.30, - "Be": 1.30, - "B": 1.15, - "C": 1.30, - "N": 1.25, - "O": 1.25, - "F": 1.15, - "Ne": 1.30, - "Na": 1.30, - "Mg": 1.30, - "Al": 1.30, - "Si": 1.10, - "P": 1.30, - "S": 1.25, - "Cl": 1.25, - "Ar": 1.30, - "K": 1.30, - "Ca": 1.30, - "Sc": 1.30, - "Ti": 1.30, - "V": 1.30, - "Cr": 1.30, - "Mn": 1.30, - "Fe": 1.30, - "Co": 1.30, - "Ni": 1.30, - "Cu": 1.30, - "Zn": 1.30, - "Ga": 1.30, - "Ge": 1.30, - "As": 1.15, - "Se": 1.15, - "Br": 1.25, - "Kr": 1.30, - "Rb": 1.30, - "Sr": 1.30, - "Y": 1.30, - "Zr": 1.30, - "Nb": 1.30, - "Mo": 1.30, - "Tc": 1.30, - "Ru": 1.30, - "Rh": 1.30, - "Pd": 1.30, - "Ag": 1.30, - "Cd": 1.30, - "In": 1.30, - "Sn": 1.30, - "Sb": 1.15, - "Te": 1.15, - "I": 1.25, - "Xe": 1.30, - "Cs": 1.30, - "Ba": 1.30, - "La": 1.30, - "Ce": 1.30, - "Pr": 1.30, - "Nd": 1.30, - "Pm": 1.30, - "Sm": 1.30, - "Eu": 1.30, - "Gd": 1.30, - "Tb": 1.30, - "Dy": 1.30, - "Ho": 1.30, - "Er": 1.30, - "Tm": 1.30, - "Yb": 1.30, - "Lu": 1.30, - "Hf": 1.30, - "Ta": 1.30, - "W": 1.30, - "Re": 1.30, - "Os": 1.30, - "Ir": 1.30, - "Pt": 1.30, - "Au": 1.30, - "Hg": 1.30, - "Tl": 1.30, - "Pb": 1.30, - "Bi": 1.30, - "Po": 1.30, - "At": 1.30, - "Rn": 1.30, - "Fr": 1.30, - "Ra": 1.30, - "Ac": 1.30, - "Th": 1.30, - "Pa": 1.30, - "U": 1.30, - "Np": 1.30, - "Pu": 1.30, - "Am": 1.30, - "Cm": 1.30, - "Bk": 1.30, - "Cf": 1.30, - "Es": 1.30, - "Fm": 1.30, - "Md": 1.30, - "No": 1.30, - "Lr": 1.30, - "Rf": 1.30, - "Db": 1.30, - "Sg": 1.30, - "Bh": 1.30, - "Hs": 1.30, - "Mt": 1.30, -} - -def get_thres_from_two_atoms(label_i, label_j, factor=1.3, debug=0): - - radii_i = elemdatabase.CovalentRadius2[label_i] - radii_j = elemdatabase.CovalentRadius2[label_j] - - if ( - elemdatabase.elementblock[label_i] != "d" - and elemdatabase.elementblock[label_i] != "f" - and elemdatabase.elementblock[label_j] != "d" - and elemdatabase.elementblock[label_j] != "f" - ): - - thres = (radii_i + radii_j) * factor - thres = round(thres, 3) - - if debug >=2 : - print(f"{label_i} : {radii_i}, {label_j} : {radii_j}, {factor}, {thres=}") - elif ( - elemdatabase.elementblock[label_i] == "d" - and elemdatabase.elementblock[label_i] == "f" - and elemdatabase.elementblock[label_j] == "d" - and elemdatabase.elementblock[label_j] == "f" - ): - factor_i = covalent_factor_for_metal[label_i] - factor_j = covalent_factor_for_metal[label_j] - - if factor_i < factor_j : - new_factor = factor_i - - elif factor_i == factor_j : - new_factor = factor_i - - else : - new_factor = factor_j - - thres = (radii_i + radii_j) * new_factor - thres = round(thres, 3) - - if debug >=2 : - print(f"{label_i} : {radii_i} ({factor_i}), {label_j} : {radii_j} ({factor_j}), {new_factor=}, {thres=}") - else : - - thres = (radii_i + radii_j) * factor - thres = round(thres, 3) - - if debug >=2 : - print(f"{label_i} : {radii_i}, {label_j} : {radii_j}, {factor}, {thres=}") - - return thres - -#################################### -def getconec(labels: list, pos: list, factor: float, radii="default") -> Tuple[int, list, list, list, list]: - status = 1 # good molecule, no clashes yet - clash = 0.3 - natoms = len(labels) - conmat = np.zeros((natoms, natoms)) - connec = np.zeros((natoms)) - mconmat = np.zeros((natoms, natoms)) - mconnec = np.zeros((natoms)) - - - covalent_factor_for_metal = { - "H": 1.30, - "D": 1.30, - "He": 1.30, - "Li": 1.30, - "Be": 1.30, - "B": 1.15, - "C": 1.30, - "N": 1.25, - "O": 1.25, - "F": 1.15, - "Ne": 1.30, - "Na": 1.30, - "Mg": 1.30, - "Al": 1.30, - "Si": 1.10, - "P": 1.30, - "S": 1.25, - "Cl": 1.25, - "Ar": 1.30, - "K": 1.30, - "Ca": 1.30, - "Sc": 1.30, - "Ti": 1.30, - "V": 1.30, - "Cr": 1.30, - "Mn": 1.30, - "Fe": 1.30, - "Co": 1.30, - "Ni": 1.30, - "Cu": 1.30, - "Zn": 1.30, - "Ga": 1.30, - "Ge": 1.30, - "As": 1.15, - "Se": 1.15, - "Br": 1.25, - "Kr": 1.30, - "Rb": 1.30, - "Sr": 1.30, - "Y": 1.30, - "Zr": 1.30, - "Nb": 1.30, - "Mo": 1.30, - "Tc": 1.30, - "Ru": 1.30, - "Rh": 1.30, - "Pd": 1.30, - "Ag": 1.30, - "Cd": 1.30, - "In": 1.30, - "Sn": 1.30, - "Sb": 1.15, - "Te": 1.15, - "I": 1.25, - "Xe": 1.30, - "Cs": 1.30, - "Ba": 1.30, - "La": 1.30, - "Ce": 1.30, - "Pr": 1.30, - "Nd": 1.30, - "Pm": 1.30, - "Sm": 1.30, - "Eu": 1.30, - "Gd": 1.30, - "Tb": 1.30, - "Dy": 1.30, - "Ho": 1.30, - "Er": 1.30, - "Tm": 1.30, - "Yb": 1.30, - "Lu": 1.30, - "Hf": 1.30, - "Ta": 1.30, - "W": 1.30, - "Re": 1.30, - "Os": 1.30, - "Ir": 1.30, - "Pt": 1.30, - "Au": 1.30, - "Hg": 1.30, - "Tl": 1.30, - "Pb": 1.30, - "Bi": 1.30, - "Po": 1.30, - "At": 1.30, - "Rn": 1.30, - "Fr": 1.30, - "Ra": 1.30, - "Ac": 1.30, - "Th": 1.30, - "Pa": 1.30, - "U": 1.30, - "Np": 1.30, - "Pu": 1.30, - "Am": 1.30, - "Cm": 1.30, - "Bk": 1.30, - "Cf": 1.30, - "Es": 1.30, - "Fm": 1.30, - "Md": 1.30, - "No": 1.30, - "Lr": 1.30, - "Rf": 1.30, - "Db": 1.30, - "Sg": 1.30, - "Bh": 1.30, - "Hs": 1.30, - "Mt": 1.30, - } - - - # Sometimes argument radii np.ndarry, or list - with warnings.catch_warnings(): - warnings.simplefilter(action="ignore", category=FutureWarning) - if radii == "default": - radii = getradii(labels) - - for i in range(0, natoms - 1): - for j in range(i, natoms): - if i != j: - # print(i,j) - a = np.array(pos[i]) - b = np.array(pos[j]) - dist = np.linalg.norm(a - b) - - if ( - elemdatabase.elementblock[labels[i]] != "d" - and elemdatabase.elementblock[labels[i]] != "f" - and elemdatabase.elementblock[labels[j]] != "d" - and elemdatabase.elementblock[labels[j]] != "f" - ): - - thres = (radii[i] + radii[j]) * factor - - elif ( - (elemdatabase.elementblock[labels[i]] == "d" or elemdatabase.elementblock[labels[i]] == "f" ) - and - (elemdatabase.elementblock[labels[j]] == "d" or elemdatabase.elementblock[labels[j]] == "f" ) - ): - thres = (radii[i] + radii[j]) * 1.1 # factor for metal-metal bond. Temporary 1.1 - #print(labels[i], labels[j], radii[i] , radii[j], dist, thres) - - elif ( - elemdatabase.elementblock[labels[i]] == "d" - or elemdatabase.elementblock[labels[i]] == "f" - or elemdatabase.elementblock[labels[j]] == "d" - or elemdatabase.elementblock[labels[j]] == "f" - ): - factor_i = covalent_factor_for_metal[labels[i]] - factor_j = covalent_factor_for_metal[labels[j]] - - if factor_i < factor_j : - new_factor = factor_i - - elif factor_i == factor_j : - new_factor = factor_i - - else : - new_factor = factor_j - #print(factor_i, factor_j, new_factor, labels[i], labels[j]) - thres = (radii[i] + radii[j]) * new_factor - - if dist <= clash: - status = 0 # invalid molecule - print("GETCONEC: Distance", dist, "smaller than clash for atoms", i, j) - elif dist <= thres: - conmat[i, j] = 1 - conmat[j, i] = 1 - if ( - elemdatabase.elementblock[labels[i]] == "d" - or elemdatabase.elementblock[labels[i]] == "f" - or elemdatabase.elementblock[labels[j]] == "d" - or elemdatabase.elementblock[labels[j]] == "f" - ): - mconmat[i, j] = 1 - mconmat[j, i] = 1 - - for i in range(0, natoms): - connec[i] = np.sum(conmat[i, :]) - mconnec[i] = np.sum(mconmat[i, :]) - - conmat = conmat.astype(int) - mconmat = mconmat.astype(int) - connec = connec.astype(int) - mconnec = mconnec.astype(int) - # return status, np.array(conmat), np.array(connec), np.array(mconmat), np.array(mconnec) - return status, conmat, connec, mconmat, mconnec - - -#################################### -def getconec_original(labels: list, pos: list, factor: float, radii="default") -> Tuple[int, list, list, list, list]: - status = 1 # good molecule, no clashes yet - clash = 0.3 - natoms = len(labels) - conmat = np.zeros((natoms, natoms)) - connec = np.zeros((natoms)) - mconmat = np.zeros((natoms, natoms)) - mconnec = np.zeros((natoms)) - # Sometimes argument radii np.ndarry, or list - with warnings.catch_warnings(): - warnings.simplefilter(action="ignore", category=FutureWarning) - if radii == "default": - radii = getradii(labels) - - for i in range(0, natoms - 1): - for j in range(i, natoms): - if i != j: - # print(i,j) - a = np.array(pos[i]) - b = np.array(pos[j]) - dist = np.linalg.norm(a - b) - thres = (radii[i] + radii[j]) * factor - if dist <= clash: - status = 0 # invalid molecule - print("GETCONEC: Distance", dist, "smaller than clash for atoms", i, j) - elif dist <= thres: - conmat[i, j] = 1 - conmat[j, i] = 1 - if ( - elemdatabase.elementblock[labels[i]] == "d" - or elemdatabase.elementblock[labels[i]] == "f" - or elemdatabase.elementblock[labels[j]] == "d" - or elemdatabase.elementblock[labels[j]] == "f" - ): - mconmat[i, j] = 1 - mconmat[j, i] = 1 - - for i in range(0, natoms): - connec[i] = np.sum(conmat[i, :]) - mconnec[i] = np.sum(mconmat[i, :]) - - conmat = conmat.astype(int) - mconmat = mconmat.astype(int) - connec = connec.astype(int) - mconnec = mconnec.astype(int) - # return status, np.array(conmat), np.array(connec), np.array(mconmat), np.array(mconnec) - return status, conmat, connec, mconmat, mconnec - -def inv(perm: list) -> list: - inverse = [0] * len(perm) - for i, p in enumerate(perm): - inverse[p] = i - return inverse - - -def getblocks(matrix: np.ndarray) -> Tuple[list, list]: - # retrieves the blocks from a diagonal block matrix - startlist = [] - endlist = [] - - start = 1 - pos = start - posold = 0 - blockcount = 0 - j = 1 - while j < len(matrix): - if matrix[pos - 1, j] != 0.0: - pos = j + 1 - if j == len(matrix) - 1: - blockcount = blockcount + 1 - startlist.append(posold) - endlist.append(pos - 1) - posold = pos - pos = pos + 1 - j = pos - 1 - continue - j += 1 - - if (blockcount == 0) and ( - len(matrix) == 1 - ): # if a 1x1 matrix is provided, it then finds 1 block - startlist.append(0) - endlist.append(0) - - return startlist, endlist - - -def find_groups_within_ligand(ligand: object) -> list: - - debug = 0 - if debug >= 1: - print(f"DOING LIGAND: {ligand.labels}") - if debug >= 1: - print(f"FIND GROUPS received conmat shape: {ligand.conmat.shape}") - if debug >= 2: - print(f"FIND GROUPS received conmat: {ligand.conmat}") - - connected_atoms = [] - unconnected_atoms = [] - cutmolec = [] - for idx, a in enumerate(ligand.atoms): - if a.mconnec >= 1: - connected_atoms.append(idx) - cutmolec.append([a.label, idx]) - elif a.mconnec == 0: - unconnected_atoms.append(idx) - - rowless = np.delete(ligand.conmat, unconnected_atoms, 0) - columnless = np.delete(rowless, unconnected_atoms, axis=1) - - if debug >= 1: - print(f"FIND GROUPS: connected are: {connected_atoms}") - if debug >= 1: - print(f"FIND GROUPS: unconnected are: {unconnected_atoms}") - - # Regenerates the truncated lig.connec - connec = [] - for idx, c in enumerate(ligand.connec): - if idx in connected_atoms: - connec.append(ligand.connec[idx]) - - # Does the - degree = np.diag(connec) - lap = columnless - degree - - # Copied from split_complex - graph = csr_matrix(lap) - perm = reverse_cuthill_mckee(graph) - gp1 = graph[perm, :] - gp2 = gp1[:, perm] - dense = gp2.toarray() - - startlist, endlist = getblocks(dense) - ngroups = len(startlist) - - atomlist = np.zeros((len(dense))) - for b in range(0, ngroups): - for i in range(0, len(dense)): - if (i >= startlist[b]) and (i <= endlist[b]): - atomlist[i] = b + 1 - invperm = inv(perm) - atomlistperm = [int(atomlist[i]) for i in invperm] - - if debug >= 1: - print(f"FIND GROUPS: the {ngroups} groups start at: {startlist}") - - groups = [] - for b in range(0, ngroups): - atlist = [] - for i in range(0, len(atomlistperm)): - if atomlistperm[i] == b + 1: - atlist.append(cutmolec[i][1]) - groups.append(atlist) - - if debug >= 1: - print(f"FIND GROUPS finds {ngroups} as {groups}") - - return groups - -####################################################### -def find_closest_metal(atom: object, metalist: list, debug: int=0) -> Tuple[np.ndarray, np.ndarray, list]: - - apos = np.array(atom.coord) - dist = [] - for tm in metalist: - bpos = np.array(tm.coord) - dist.append(np.linalg.norm(apos - bpos)) - - # finds the closest Metal Atom (tgt) - tgt = np.argmin(dist) - return tgt, apos, dist[tgt] - -############################ CLASSES ######################### - -############### -### ATOM ###### -############### -class atom(object): - def __init__(self, index: int, label: str, coord: list, radii: float) -> None: - self.version = "V1.0" - self.index = index - self.label = label - self.coord = coord - self.frac = [] - self.atnum = elemdatabase.elementnr[label] - self.val = elemdatabase.valenceelectrons[label] # currently not used - self.weight = elemdatabase.elementweight[label] # currently not used - self.block = elemdatabase.elementblock[label] - self.radii = radii - - # Adjacency part is simultaneously to creating the ligand or molecule object - ### Changed in V14 - def adjacencies(self, conmat: np.ndarray, mconmat: np.ndarray, type: str="Molecule") -> None: - self.adjacency = [] - self.metal_adjacency = [] - - self.connec = np.sum(conmat) - if conmat.shape: - for idx, c in enumerate(conmat): - if c >= 1: - self.adjacency.append(idx) - else: - self.adjacency.append(conmat) - - if type == "Molecule": - self.mconnec = np.sum(mconmat) - if mconmat.shape: - for idx, c in enumerate(mconmat): - if c >= 1: - self.metal_adjacency.append(idx) - else: - self.adjacency.append(conmat) - - elif type == "Ligand" or type == "Metal": - self.mconnec = mconmat # this has to be improved, now it only receives a number, should receive a vector as above for "molecule" - - # Bonds part is created after the formal charge for each molecule/ligand/metal is decided - def bonds(self, start: list, end: list, order: list) -> None: - self.bond = [] - self.bond_start_idx = [] - self.bond_end_idx = [] - self.bond_order = [] - - for a in start: - self.bond_start_idx.append(a) - for b in end: - self.bond_end_idx.append(b) - for c in order: - self.bond_order.append(c) - - for group in zip(start, end, order): - self.bond.append(group) - - self.nbonds = len(self.bond) - self.totbondorder = np.sum(self.bond_order) - - def atom_charge(self, charge: int) -> None: - self.charge = charge - -############### -### MOLECULE ## -############### -class molecule(object): - def __init__(self, name: str, atlist: list, labels: list, coord: list, radii:list) -> None: - self.version = "V1.0" - self.refcode = "" - self.name = name - self.atlist = atlist - self.labels = labels - self.coord = coord - self.radii = radii - self.formula = labels2formula(labels) - self.occurrence = 0 # How many times the molecule appears in a unit cell - - self.natoms = len(atlist) - self.elemcountvec = getelementcount(labels) - self.Hvcountvec = getHvcount(labels) - - self.frac = [] - self.centroid = [] - self.tmatrix = [] - - # Creates Atoms - self.atnums = [] - self.atoms = [] - for idx, l in enumerate(self.labels): - newatom = atom(idx, l, self.coord[idx], self.radii[idx]) - self.atnums.append(newatom.atnum) - self.atoms.append(newatom) - - self.type = "Other" - self.eleccount = 0 - self.numH = 0 - for a in self.atoms: - if a.atnum == 1: - self.numH += 1 - self.eleccount += a.atnum - if (a.block == "d") or (a.block == "f"): - self.type = "Complex" - - if self.type == "Complex": - self.ligandlist = [] - self.metalist = [] - self.hapticity = False # V13 - self.hapttype = [] # V13 - - # Lists of potentially good variables for this molecule - self.poscharge = [] - self.posatcharge = [] - self.posobjlist = [] - self.posspin = [] - self.possmiles = [] - - # Stores the covalentradii factor and metal factor that were used to generate the molecule - def information(self, factor: float, metal_factor: float) -> None: - self.factor = factor - self.metal_factor = metal_factor - - # Actual variables for the molecule in the crystal where it comes from: - def charge(self, atcharge: np.ndarray, totcharge: int, rdkit_mol: list, smiles: list) -> None: - self.atcharge = atcharge - self.totcharge = totcharge - self.smiles_with_H = " " # If "Complex", it will be a list of ligand smiles - self.smiles = smiles # If "Complex", it will be a list of ligand smiles - self.rdkit_mol = rdkit_mol # If "Complex", it is an empty list - - if self.type != "Complex": - for idx, a in enumerate(self.atoms): - a.atom_charge(self.atcharge[idx]) - - # Spin State Variables - def magnetism(self, spin: int) -> None: - self.spin = spin - - def ml_prediction(self, spin_rf: int) -> None: - self.spin_rf = spin_rf - - # Connectivity = Adjacency Matrix. Potentially expandable to Include Bond Types - def adjacencies(self, conmat: np.ndarray, mconmat: np.ndarray) -> None: - self.conmat = conmat - self.mconmat = mconmat - self.connec = np.zeros((self.natoms)) - self.mconnec = np.zeros((self.natoms)) - for i in range(0, self.natoms): - self.connec[i] = np.sum(self.conmat[i, :]) - self.mconnec[i] = np.sum(self.mconmat[i, :]) - - self.totconnec = int(np.sum(self.connec) / 2) - self.totmconnec = int(np.sum(self.mconnec) / 2) - self.adjtypes = get_adjacency_types(self.labels, self.conmat) # V14 - # self.nbonds = int(np.sum(self.bonds)/2) - - for idx, a in enumerate(self.atoms): - # print("adjacencies sent with", np.array(self.conmat[idx].astype(int)), np.array(self.mconmat[idx].astype(int))) - a.adjacencies(np.array(self.conmat[idx].astype(int)),np.array(self.mconmat[idx].astype(int)),type="Molecule") - - # def repr_CM(self, ): - # self.CM = coulomb_matrix(self.atnums, self.coord) - # NOTE: don't forget to copy to ligand object when ready - - -############### -### LIGAND #### -############### -class ligand(object): - def __init__(self, name: str, atlist: list, labels: list, coord: list, radii: list) -> None: - self.version = "V1.0" - self.refcode = "" - self.name = name # creates as ligand index, later modified - self.atlist = atlist # atom index list. Numbers refer to the original molecule from where the subroutine is launched - self.labels = labels # elements - self.coord = coord # coordinates - self.radii = radii - self.formula = labels2formula(labels) - self.occurrence = 0 # How many times the ligand appears in a molecule - - self.natoms = len(atlist) # number of atoms - self.type = "Ligand" - self.elemcountvec = getelementcount(labels) - self.Hvcountvec = getHvcount(labels) - - # Lists of potentially good variables for this molecule - self.poscharge = [] - self.posatcharge = [] - self.posobjlist = [] - self.posspin = [] - self.possmiles = [] - - # Stores information about the metal to which it is attached, about groups of connected atoms, and hapticity - self.metalatoms = [] - self.grouplist = [] # V14, this info is generated in get_hapticity - self.hapticity = False # V13 - self.hapttype = [] # V13 - # self.haptgroups = [] #V14, replaced by grouplist - - self.atnums = [] - self.atoms = [] - for idx, l in enumerate(self.labels): - newatom = atom(idx, l, self.coord[idx], self.radii[idx]) - self.atnums.append(newatom.atnum) - self.atoms.append(newatom) - - # Creates atoms and defines charge - self.eleccount = 0 - self.numH = 0 - for a in self.atoms: - if a.atnum == 1: - self.numH += 1 - self.eleccount += a.atnum - - # Stores the covalentradii factor and metal factor that were used to generate the molecule - def information(self, factor: float, metal_factor: float) -> None: - self.factor = factor - self.metal_factor = metal_factor - - def charge(self, atcharge: list, totcharge: int, rdkit_mol: object, smiles: str) -> None: - self.atcharge = atcharge - self.totcharge = totcharge - self.smiles_with_H = " " # Smiles of the ligand that includes any added H atom, created at "build_bonds" function - self.smiles = smiles # Now Empty, created later as a smiles without any added H atom - self.rdkit_mol = rdkit_mol # Rdkit mol object - - for idx, a in enumerate(self.atoms): - a.atom_charge(int(self.atcharge[idx])) - - # Spin State Variables - def magnetism(self, spin: int) -> None: - self.spin = spin - - def adjacencies(self, conmat: np.ndarray, mconnec: np.ndarray) -> None: - self.conmat = conmat - self.connec = np.zeros((self.natoms)) - for i in range(0, self.natoms): - self.connec[i] = np.sum(self.conmat[i, :]) - - # For ligand, mconmat is all zero np.ndarray --> not used, Can I remove? - # so metal_adjacency in Class atom is also empty because mconmat doesn't contain metal-ligand connectivity - self.mconmat = np.zeros((self.natoms, self.natoms)).astype(int) - - self.mconnec = mconnec - self.totconnec = int(np.sum(self.connec)) - self.totmconnec = int(np.sum(self.mconnec)) - self.adjtypes = get_adjacency_types(self.labels, self.conmat) # V14 - - for idx, a in enumerate(self.atoms): - a.adjacencies(np.array(self.conmat[idx].astype(int)), int(mconnec[idx]), type="Ligand") - -############### -class group(object): - def __init__(self, atlist: list, hapticity: bool, hapttype: list) -> None: - self.version = "V1.0" - self.atlist = atlist # atom index list. Numbers refer to the original molecule from where the subroutine is launched - self.hapticity = hapticity - self.hapttype = hapttype - - -############### -#### METAL #### -############### -class metal(object): - def __init__(self, name: int, atlist: int, label: str, coord: list, radii: float) -> None: - self.version = "V1.0" - self.refcode = "" - self.name = name # creates as metal index, later modified - self.atlist = atlist # atom index list. Numbers refer to the original molecule from where the subroutine is launched - self.label = label - self.coord = coord - self.radii = radii - self.natom = int(1) # number of atoms - self.type = "Metal" - self.poscharge = [] - self.coord_sphere = [] - self.coord_sphere_ID = [] - self.coordinating_atoms = [] - self.coordinating_atoms_sites = [] - self.occurrence = 0 # How many times the metal appears in a unit cell - self.coordinating_atlist = [] - self.group_list = [] - self.group_atoms_list = [] - - self.atom = atom(name, label, self.coord, self.radii) - - # Stores the covalentradii factor and metal factor that were used to generate the molecule - def information(self, factor: float, metal_factor: float) -> None: - self.factor = factor - self.metal_factor = metal_factor - - def charge(self, metal_charge: int) -> None: - self.totcharge = metal_charge - - # def magnetism(self, spin): - # self.spin = spin - - def adjacencies(self, mconnec: np.ndarray) -> None: - self.mconnec = mconnec # adjacencies matrix with only metal bonds - self.totmconnec = int(np.sum(mconnec)) - self.atom.adjacencies(np.array(int(0)), int(mconnec), type="Metal") - - def coordination (self, coordination_number: int, hapticity: bool, hapttype: list, posgeom_dev: dict) -> None: - self.hapticity = hapticity - self.hapttype = hapttype - self.coordination_number=coordination_number - self.posgeom_dev=posgeom_dev - if len(posgeom_dev) == 0: - self.geometry = "Undefined" - self.deviation = "Undefined" - else: - self.geometry=min(posgeom_dev, key=posgeom_dev.get) - self.deviation=min(posgeom_dev.values()) - - def relative_radius(self, rel: float, rel_g: float, rel_c: float) -> None: - self.rel = rel - self.rel_g = rel_g - self.rel_c = rel_c - - -############## -#### CELL #### -############## -class cell(object): - def __init__(self, refcode: str, labels: list, pos: list, cellvec: list, cellparam: list, warning_list: list) -> None: - - self.version = "V1.0" - self.refcode = refcode - - self.cellvec = cellvec - self.cellparam = cellparam - - self.labels = labels - self.atom_coord = pos # Atom cartesian coordinates from info file - - self.natoms = len(labels) - self.coord = [] - - self.speclist = [] - self.refmoleclist = [] - self.moleclist = [] - self.warning_list = warning_list - self.warning_after_reconstruction = [] - - self.charge_distribution_list = [] - - def arrange_cell_coord(self): - ## Updates the cell coordinates preserving the original atom ordering - ## Do do so, it uses the variable atlist stored in each molecule - self.coord = np.zeros((self.natoms,3)) - for mol in self.moleclist: - for z in zip(mol.atlist, mol.coord): - for i in range(0,3): - self.coord[z[0]][i] = z[1][i] - self.coord = np.ndarray.tolist(self.coord) - - - def data_for_postproc(self, molecules, indices, options): - self.pp_molecules = molecules - self.pp_indices = indices - self.pp_options = options - - def print_charge_assignment(self): - print("=====================================Charges for all species in the unit cell=====================================") - print("[Ref.code] : {}".format(self.refcode)) - for unit in self.moleclist: - if unit.type == "Complex": - print("\n{}, totcharge {}, spin multiplicity {}".format(unit.type, unit.totcharge, unit.spin)) - for metal in unit.metalist: - print("\t Metal: {} charge {}".format(metal.label, metal.totcharge)) - for ligand in unit.ligandlist: - print("\t Ligand charge {}, {}".format(ligand.totcharge, ligand.smiles)) - elif unit.type == "Other": - print("\n{} totcharge {}, {}".format(unit.type, unit.totcharge, unit.smiles)) - print("\n") - - def print_Warning(self): - reason_of_Warning = [ - "Warning 1! Errors received from getting reference molecules (disorder or strange coordination)", - "Warning 2! Missing H atoms from C in reference molecules", - "Warning 3! Missing H atoms from coordinated water in reference molecules", - "Warning 4! Steric clashes while blocking molecules", - "Warning 5! Errors in cell reconstruction", - "Warning 6! Empty list of possible charges received for molecule or ligand", - "Warning 7! More than one valid possible charge distribution found", - "Warning 8! No valid possible charge distribution found", - "Warning 9! Error while preparing molecules.", - ] - - # printing original list - print("The Warning type list is : " + str(self.warning_list)) - - res = [i for i, val in enumerate(self.warning_list) if val] - # printing result - # print ("The list indices having True values are : " + str(res)) - - if len(res) == 0: - print("No Warnings!") - else: - for i in res: - print(reason_of_Warning[i]) -########## END OF CLASSES ###########