diff --git a/cell2mol/charge_assignment.py b/cell2mol/charge_assignment.py index 7dd95a20..633963d6 100644 --- a/cell2mol/charge_assignment.py +++ b/cell2mol/charge_assignment.py @@ -45,18 +45,20 @@ def get_possible_charge_state(spec: object, debug: int=0): print(f"GET_POSSIBLE_CHARGE_STATE: {spec.formula} ({spec.subtype}) {spec.cov_factor=}") charge_states = [] ### Evaluates possible charges for each protonation state ### + print(f"GET_POSSIBLE_CHARGE_STATE: {spec.formula} ({spec.subtype}) ({len(spec.protonation_states)}) {spec.protonation_states=}") for prot in spec.protonation_states: - # charge_states = [] + charge_states_for_one_prot = [] final_charges = get_list_of_charges_to_try(prot) if debug >= 2: print(f" POSCHARGE will try charges {final_charges}") for ich in final_charges: ch_state = get_charge(ich, prot) ## Protonation is passed to the ch_state object (ch_state.protonation) - charge_states.append(ch_state) + charge_states_for_one_prot.append(ch_state) if ch_state is not None: if debug >= 2: print(f" POSCHARGE: charge {ich} with smiles {ch_state.smiles}") else : if debug >= 2: print(f" POSCHARGE: charge {ich} failed {ch_state}") + charge_states.extend(charge_states_for_one_prot) if debug >= 2: print(f"POSCHARGE: {len(charge_states)=}") ### After collecting charge states, then best ones are selected if spec.subtype == "ligand": @@ -222,6 +224,7 @@ def get_protonation_states_specie(specie: object, debug: int=0) -> list: # Boolean that decides whether a non-local approach is needed non_local_groups = 0 + non_local_groups_indices = [] needs_nonlocal = False # Initialization of the variables @@ -387,6 +390,7 @@ def get_protonation_states_specie(specie: object, debug: int=0) -> list: if a.connec == 1: needs_nonlocal = True non_local_groups += 1 + non_local_groups_indices.append(idx) if debug >= 2: print(f" GET_PROTONATION_STATES: will be sent to nonlocal due to {a.label} atom") elif a.connec > 1: block[idx] = 1 @@ -401,6 +405,7 @@ def get_protonation_states_specie(specie: object, debug: int=0) -> list: elif a.connec == 2: needs_nonlocal = True non_local_groups += 1 + non_local_groups_indices.append(idx) if debug >= 2: print(f" GET_PROTONATION_STATES: will be sent to nonlocal due to {a.label} atom") # block[idx] = 1 # elemlist[idx] = "H" @@ -435,7 +440,8 @@ def get_protonation_states_specie(specie: object, debug: int=0) -> list: if a.connec >= 3: block[idx] = 1 # needs_nonlocal = True - # non_local_groups += 1 + # non_local_groups += 1 + # non_local_groups_indices.append(idx) # elemlist[idx] = "H" # addedlist[idx] = 1 else: @@ -451,6 +457,7 @@ def get_protonation_states_specie(specie: object, debug: int=0) -> list: else: needs_nonlocal = True non_local_groups += 1 + non_local_groups_indices.append(idx) if debug >= 2: print(f" GET_PROTONATION_STATES: will be sent to nonlocal due to {a.label} atom") # Phosphorous elif (a.connec >= 3) and a.label == "P": @@ -480,6 +487,7 @@ def get_protonation_states_specie(specie: object, debug: int=0) -> list: else: needs_nonlocal = True non_local_groups += 1 + non_local_groups_indices.append(idx) if debug >= 2: print(f" GET_PROTONATION_STATES: will be sent to nonlocal due to {a.label} atom") # Silicon elif a.label == "Si": @@ -498,6 +506,7 @@ def get_protonation_states_specie(specie: object, debug: int=0) -> list: if not needs_nonlocal: needs_nonlocal = True non_local_groups += 1 + non_local_groups_indices.append(idx) if debug >= 2: print(f" GET_PROTONATION_STATES: will be sent to nonlocal due to {a.label} atom with no rules") # If, at this stage, we have found that any atom must be added, this is done before entering the non_local part. @@ -544,12 +553,13 @@ def get_protonation_states_specie(specie: object, debug: int=0) -> list: if debug >= 2: print(f" GET_PROTONATION_STATES: local_labels: {local_labels}") if debug >= 2: print(f" GET_PROTONATION_STATES: block: {block}") if debug >= 2: print(f" GET_PROTONATION_STATES: addedlist: {addedlist}") - if debug >= 2: print(f" GET_PROTONATION_STATES: {non_local_groups} non_local_groups groups found") - if debug >= 2: print(f" GET_PROTONATION_STATES: {non_local_groups=}") + if debug >= 2: print(f" GET_PROTONATION_STATES: {len(non_local_groups_indices)} non_local_groups groups found") + if debug >= 2: print(f" GET_PROTONATION_STATES: non_local_groups={[ligand.labels[idx] for idx in non_local_groups_indices]}") + if debug >= 2: print(f" GET_PROTONATION_STATES: {non_local_groups_indices=}") # CREATES ALL COMBINATIONS OF PROTONATION STATES# # Creates [0,1] tuples for each non_local protonation site tmp = [] - for kdx in range(0,non_local_groups): + for kdx in range(0, len(non_local_groups_indices)): tmp.append([0,1]) if len(tmp) > 1: @@ -574,13 +584,13 @@ def get_protonation_states_specie(specie: object, debug: int=0) -> list: o_s = np.sum(com) toallocate = int(0) - print(f"{non_local_groups=}") + print(f"{non_local_groups=} {non_local_groups_indices=}") for jdx, a in enumerate(ligand.atoms): if a.mconnec >= 1 and a.label not in avoid and block[jdx] == 0: print(jdx, a.label, a.mconnec) print("====") for jdx, a in enumerate(ligand.atoms): - if a.mconnec >= 1 and a.label not in avoid and block[jdx] == 0: + if a.mconnec >= 1 and a.label not in avoid and block[jdx] == 0 and jdx in non_local_groups_indices: print(a.label) if non_local_groups > 1: print(f"{com=} {toallocate=}") @@ -904,11 +914,29 @@ def get_metal_poscharges(metal: object, debug: int=0) -> list: # Coordination Geometry Table of the D-Block Elements and Their Ions. # J. Chem. Educ. 1997, 74, 915. + # metalloids = ["B", "Si", "Ge", "As", "Sb", "Te", "Po"] mol = metal.get_parent("molecule") if not hasattr(mol,"is_haptic"): mol.get_hapticity() atnum = elemdatabase.elementnr[metal.label] at_charge = defaultdict(list) + + # Alkali Metals + at_charge[3] = [1] # Li + at_charge[11] = [1] # Na + at_charge[13] = [1] # K + at_charge[31] = [1] # Rb + at_charge[49] = [1] # Cs + at_charge[81] = [1] # Fr + + # Alkaline Earth Metals + at_charge[4] = [2] # Be + at_charge[12] = [2] # Mg + at_charge[20] = [2] # Ca + at_charge[38] = [2] # Sr + at_charge[56] = [2] # Ba + at_charge[88] = [2] # Ra + # 1st-row transition metals. at_charge[21] = [3] # Sc at_charge[22] = [2, 3, 4] # Ti @@ -943,6 +971,16 @@ def get_metal_poscharges(metal: object, debug: int=0) -> list: at_charge[79] = [1, 3] # Au at_charge[80] = [2] # Hg + # post-transition metals + at_charge[13] = [0, 3] # Al + at_charge[31] = [0, 3] # Ga + at_charge[32] = [0, 2, 4] # Ge + at_charge[49] = [0, 3] # In + at_charge[50] = [0, 2, 4] # Sn + at_charge[81] = [0, 1, 3] # Tl + at_charge[82] = [0, 2, 4] # Pb + at_charge[83] = [0, 3] # Bi + poscharges = at_charge[atnum] list_of_zero_OS = ["Fe", "Ni", "Ru"] diff --git a/cell2mol/classes.py b/cell2mol/classes.py index ea79d601..1a1cef1c 100644 --- a/cell2mol/classes.py +++ b/cell2mol/classes.py @@ -1,7 +1,7 @@ import numpy as np import os -from cell2mol.connectivity import get_adjacency_types, get_element_count, labels2electrons, labels2formula, get_adjmatrix -from cell2mol.connectivity import get_metal_idxs, split_species, get_radii, split_group +from cell2mol.connectivity import get_adjacency_types, get_element_count, labels2electrons, labels2formula, get_adjmatrix, is_haptic_ring +from cell2mol.connectivity import get_metal_idxs, get_non_transition_metal_idxs, split_species, get_radii, split_group from cell2mol.connectivity import compare_atoms, compare_species, compare_metals, compare_reference_indices from cell2mol.cell_reconstruction import classify_fragments, fragments_reconstruct from cell2mol.cell_operations import cart2frac, frac2cart_fromparam @@ -196,6 +196,9 @@ def set_atoms(self, atomlist=None, create_adjacencies: bool=False, debug: int=0) if debug > 0: print(f"SPECIE.SET_ATOMS: creating atom for label {l}") ## For each l in labels, create an atom class object. ismetal = elemdatabase.elementblock[l] == "d" or elemdatabase.elementblock[l] == "f" + # non transition metals + if len(get_non_transition_metal_idxs([l])) > 0: ismetal = True + if ismetal : print(f"SPECIE.SET_ATOMS: {l}") if debug > 0: print(f"SPECIE.SET_ATOMS: {ismetal=}") if self.frac_coord is not None: if ismetal: newatom = metal(l, self.coord[idx], self.frac_coord[idx], radii=self.radii[idx]) @@ -393,6 +396,12 @@ def split_complex(self, debug: int=0): self.metals = [] # Identify Metals and the rest metal_idx = list([self.indices[idx] for idx in get_metal_idxs(self.labels, debug=debug)]) + non_transition_metals_idx = list([self.indices[idx] for idx in get_non_transition_metal_idxs(self.labels, debug=debug)]) + if len(non_transition_metals_idx) > 0: + print(f"MOLECULE.SPLIT COMPLEX: Found non-transition metals in the molecule {self.formula}") + print(f"MOLECULE.SPLIT COMPLEX: Non-transition metals found: {[self.labels[idx] for idx in non_transition_metals_idx]}") + metal_idx.extend(non_transition_metals_idx) + rest_idx = list(idx for idx in self.indices if idx not in metal_idx) if debug > 0 : print(f"MOLECULE.SPLIT COMPLEX: labels={self.labels}") if debug > 0 : print(f"MOLECULE.SPLIT COMPLEX: metal_idx={metal_idx}") @@ -759,6 +768,7 @@ def get_hapticity(self, debug: int=0): numO = self.labels.count("O") # For h4-Enone numN = self.labels.count("N") + ## Carbon-based Haptic Ligands if numC == 2: self.haptic_type = ["h2-Benzene", "h2-Butadiene", "h2-ethylene"]; self.is_haptic = True elif numC == 3 and numO == 0: self.haptic_type = ["h3-Allyl", "h3-Cp"]; self.is_haptic = True @@ -771,7 +781,11 @@ def get_hapticity(self, debug: int=0): # Other less common types of haptic ligands elif numC == 0 and numAs == 5: self.haptic_type = ["h5-AsCp"]; self.is_haptic = True elif numC == 0 and numP == 5: self.haptic_type = ["h5-Pentaphosphole"]; self.is_haptic = True - elif numC == 1 and numP == 1: self.haptic_type = ["h2-P=C"]; self.is_haptic = True + elif numC == 1 and numP == 1: self.haptic_type = ["h2-P=C"]; + elif is_haptic_ring(self.labels, self.coord): + self.haptic_type = [f"{len(self.labels)}-ring {self.formula}"] + self.is_haptic = True + return self.haptic_type ####################################################### @@ -1359,6 +1373,9 @@ def check_missing_H(self, debug: int=0): from cell2mol.missingH import check_missingH Warning, ismissingH, Missing_H_in_C, Missing_H_in_CoordWater, Missing_H_in_Water = check_missingH(self.refmoleclist, debug=debug) print(f"CELL.Check_missing_H: {Missing_H_in_C=} {Missing_H_in_CoordWater=} {Missing_H_in_Water=}") + self.missing_H_in_Carbon = Missing_H_in_C + self.missing_H_in_CoordWater = Missing_H_in_CoordWater + self.missing_H_in_Water = Missing_H_in_Water if ismissingH or Missing_H_in_C or Missing_H_in_CoordWater or Missing_H_in_Water : self.has_missing_H = True else: @@ -1675,38 +1692,49 @@ def assign_charges (self, debug: int=0): final_charge_distribution, final_charges = balance_charge(self.unique_indices, self.unique_species, debug=debug) print("final_charge_distribution", final_charge_distribution) print("final_charges", final_charges) - if len(final_charge_distribution) > 1: - if debug >= 1: print("More than one Possible Distribution Found:", final_charge_distribution) - second_final_charge_distribution, second_final_charges = balance_charge(self.unique_indices, self.unique_species, predict=True, debug=debug) - print("second_final_charge_distribution", second_final_charge_distribution) - print("second_final_charges", second_final_charges) + # if len(final_charge_distribution) > 1: + # if debug >= 1: print("More than one Possible Distribution Found:", final_charge_distribution) + # second_final_charge_distribution, second_final_charges = balance_charge(self.unique_indices, self.unique_species, predict=True, debug=debug) + # print("second_final_charge_distribution", second_final_charge_distribution) + # print("second_final_charges", second_final_charges) - if len(second_final_charge_distribution) == 1: - self.error_multiple_distrib = False - self.error_empty_distrib = False - final_charge_distribution = second_final_charge_distribution - final_charges = second_final_charges - else: - self.error_multiple_distrib = True - self.error_empty_distrib = False - return # Stopping. + # if len(second_final_charge_distribution) == 1: + # self.error_multiple_distrib = False + # self.error_empty_distrib = False + # final_charge_distribution = second_final_charge_distribution + # final_charges = second_final_charges + # else: + # self.error_multiple_distrib = True + # self.error_empty_distrib = False + # return # Stopping. - elif len(final_charge_distribution) == 0: # - if debug >= 1: print("No valid Distribution Found", final_charge_distribution) - second_final_charge_distribution, second_final_charges = balance_charge(self.unique_indices, self.unique_species, rare=True, debug=debug) - print("second_final_charge_distribution", second_final_charge_distribution) - print("second_final_charges", second_final_charges) + # elif len(final_charge_distribution) == 0: # + # if debug >= 1: print("No valid Distribution Found", final_charge_distribution) + # second_final_charge_distribution, second_final_charges = balance_charge(self.unique_indices, self.unique_species, rare=True, debug=debug) + # print("second_final_charge_distribution", second_final_charge_distribution) + # print("second_final_charges", second_final_charges) - if len(second_final_charge_distribution) == 1: - self.error_multiple_distrib = False - self.error_empty_distrib = False - final_charge_distribution = second_final_charge_distribution - final_charges = second_final_charges + # if len(second_final_charge_distribution) == 1: + # self.error_multiple_distrib = False + # self.error_empty_distrib = False + # final_charge_distribution = second_final_charge_distribution + # final_charges = second_final_charges - else: - self.error_multiple_distrib = False - self.error_empty_distrib = True - return # Stopping. + # else: + # self.error_multiple_distrib = False + # self.error_empty_distrib = True + # return # Stopping. + if len(final_charge_distribution) > 1: + if debug >= 1: print("More than one Possible Distribution Found:", final_charge_distribution) + self.error_multiple_distrib = True + self.error_empty_distrib = False + return # Stopping. + + elif len(final_charge_distribution) == 0: + if debug >= 1: print("No valid Distribution Found", final_charge_distribution) + self.error_multiple_distrib = False + self.error_empty_distrib = True + return # Stopping. else: # Only one possible charge distribution -> getcharge for the repeated species self.error_multiple_distrib = False @@ -2002,14 +2030,20 @@ def assess_errors(self, mode): print("Errors in hydrogens") print("-------------------------------") if self.has_isolated_H: case = 1 - elif self.has_missing_H: case = 2 + # elif self.has_missing_H: case = 2 + elif self.missing_H_in_Water: case = 2 + elif self.missing_H_in_CoordWater: case = 3 + elif self.missing_H_in_Carbon: case = 4 else : case = 0 elif mode == "possible_charges": print("-------------------------------") print("Errors in possible charges") print("-------------------------------") if self.has_isolated_H: case = 1 - elif self.has_missing_H: case = 2 + # elif self.has_missing_H: case = 2 + elif self.missing_H_in_Water: case = 2 + elif self.missing_H_in_CoordWater: case = 3 + elif self.missing_H_in_Carbon: case = 4 elif self.error_get_poscharges: case = 5 else : case = 0 elif mode == "reconstruction": @@ -2034,6 +2068,8 @@ def assess_errors(self, mode): elif self.error_empty_distrib : case = 7 elif self.error_create_bonds : case = 8 else : case = 0 + # handle_error(case) + # print("") # elif mode == "neutrality": # print("-------------------------------") # print("Errors in Unit Cell") @@ -2052,9 +2088,20 @@ def assess_errors(self, mode): # if not self.is_neutral: case = 9 # # No errors # else : case = 0 - - handle_error(case) - print("") + if mode == "hydrogens" or mode == "possible_charges": + if case == 2 or case == 3 or case == 4 : + handle_error(2) + if case == 2: + print(" - Missing Hydrogens in Water Molecules") + elif case == 3: + print(" - Missing Hydrogens in Coordinated Water Molecules") + elif case == 4: + print(" - Missing Hydrogens in Carbon Atoms") + else : + handle_error(case) + else: + handle_error(case) + print("") self.error_case = case ####################################################### diff --git a/cell2mol/connectivity.py b/cell2mol/connectivity.py index d2a977c3..96bd4ff3 100644 --- a/cell2mol/connectivity.py +++ b/cell2mol/connectivity.py @@ -8,6 +8,8 @@ from cell2mol.elementdata import ElementData from cell2mol.read_write import writexyz import os +import networkx as nx + elemdatabase = ElementData() ####################################################### @@ -116,6 +118,29 @@ def find_closest_metal(atom: object, metalist: list, debug: int=0): return np.argmin(dist) ################################ +def is_haptic_ring(labels, coord): + """ Check if the group is a ring """ + isgood, adjmat, adjnum = get_adjmatrix(labels, coord) + + # Convert adjacency matrix to a NetworkX graph + G = nx.from_numpy_array(np.array(adjmat)) + + # Check if the graph is connected + if not nx.is_connected(G): + return False # If not connected, can't form a single ring + + # Check for cycles and ensure the graph forms a simple cycle + cycle_basis = nx.cycle_basis(G) + + # Check if there's exactly one cycle that includes all nodes (simple ring) + if len(cycle_basis) == 1 and len(cycle_basis[0]) == len(G.nodes): + print("Ring group", len(labels), labels) + return True # The graph represents a ring compound + + return False # Otherwise, not a ring compound + +################################ + def labels2formula(labels: list): elems = elemdatabase.elementnr.keys() formula=[] @@ -155,15 +180,18 @@ def get_metal_idxs(labels: list, debug: int=0): return metal_indices ################################ -def get_alkali_alkaline_earth_metal_idxs(labels: list, debug: int=0): +def get_non_transition_metal_idxs(labels: list, debug: int=0): """ alkali metals (Group 1) and alkaline earth metals (Group 2) """ - alkali_alkaline_earth_metal_indices = [] + non_transition_metal_indices = [] for idx, l in enumerate(labels): - if elemdatabase.elementgroup[l]==1 and l != "H" and l != "D": - alkali_alkaline_earth_metal_indices.append(idx) + if elemdatabase.elementgroup[l]==1 and l != "H" and l != "D": # Alkali Metals + non_transition_metal_indices.append(idx) elif elemdatabase.elementgroup[l]==2 : - alkali_alkaline_earth_metal_indices.append(idx) - return alkali_alkaline_earth_metal_indices + non_transition_metal_indices.append(idx) # Alkaline Earth Metals + elif l in ["Al", "Ga", "Ge", "In", "Sn", "Tl", "Pb", "Bi", "Po", "At"]: + non_transition_metal_indices.append(idx) + return non_transition_metal_indices + ################################ def get_metal_species(labels: list): @@ -266,22 +294,25 @@ def get_adjmatrix(labels: list, pos: list, cov_factor: float=1.3, radii="default or elemdatabase.elementblock[labels[j]] == "f"): adjmat[i, j] = 1 adjmat[j, i] = 1 - + elif len(get_non_transition_metal_idxs([labels[i], labels[j]])) > 0: + adjmat[i, j] = 1 + adjmat[j, i] = 1 + # Set Adjacency Matrix as zeros for alkali and alkaline earth metals - for i in range(0, natoms - 1): - for j in range(i, natoms): - if i != j: - if (elemdatabase.elementgroup[labels[i]] == 2 or elemdatabase.elementgroup[labels[j]] == 2) : - if adjmat[i, j] == 1 or adjmat[j, i] == 1: - print("Adjacency Matrix: Alkali or Alkaline Earth Metal", labels[i], labels[j], f"{i=}", f"{j=}", f"{adjmat[i, j]=}") - adjmat[i, j] = 0 - adjmat[j, i] = 0 - elif (labels[i] != "H" and labels[j] != "H") and (labels[i] != "D" and labels[j] != "D"): - if (elemdatabase.elementgroup[labels[i]] == 1 or elemdatabase.elementgroup[labels[j]] == 1 ): - if adjmat[i, j] == 1 or adjmat[j, i] == 1: - print("Adjacency Matrix: Alkali or Alkaline Earth Metal", labels[i], labels[j], f"{i=}", f"{j=}", f"{adjmat[i, j]=}") - adjmat[i, j] = 0 - adjmat[j, i] = 0 + # for i in range(0, natoms - 1): + # for j in range(i, natoms): + # if i != j: + # if (elemdatabase.elementgroup[labels[i]] == 2 or elemdatabase.elementgroup[labels[j]] == 2) : + # if adjmat[i, j] == 1 or adjmat[j, i] == 1: + # print("Adjacency Matrix: Alkali or Alkaline Earth Metal", labels[i], labels[j], f"{i=}", f"{j=}", f"{adjmat[i, j]=}") + # adjmat[i, j] = 0 + # adjmat[j, i] = 0 + # elif (labels[i] != "H" and labels[j] != "H") and (labels[i] != "D" and labels[j] != "D"): + # if (elemdatabase.elementgroup[labels[i]] == 1 or elemdatabase.elementgroup[labels[j]] == 1 ): + # if adjmat[i, j] == 1 or adjmat[j, i] == 1: + # print("Adjacency Matrix: Alkali or Alkaline Earth Metal", labels[i], labels[j], f"{i=}", f"{j=}", f"{adjmat[i, j]=}") + # adjmat[i, j] = 0 + # adjmat[j, i] = 0 # Sums the adjacencies of each atom to obtain "adjnum" for i in range(0, natoms): diff --git a/cell2mol/coordination_sphere.py b/cell2mol/coordination_sphere.py index 2c743f4d..1acfb67f 100644 --- a/cell2mol/coordination_sphere.py +++ b/cell2mol/coordination_sphere.py @@ -1,7 +1,7 @@ import numpy as np from cosymlib import Geometry from cell2mol.other import * -from cell2mol.connectivity import add_atom +from cell2mol.connectivity import add_atom, get_adjmatrix from cell2mol.elementdata import ElementData elemdatabase = ElementData() @@ -397,29 +397,36 @@ def coordination_correction_for_nonhaptic(group: object, debug: int=0): isremoved = False ## Now there is an extra loop for each metal of the group. For bridging ligands - for met in group.metals: + for jdx, met in enumerate(group.metals): if isremoved: continue lig = group.get_parent("ligand") ligand_idx = atom.get_parent_index("ligand") if debug > 0: print(f"\tevaluating coordination with metal {met.label}") if debug > 2: print(f"\n{met}") - isadded, newlab, newcoord = add_atom(lig.labels, lig.coord, ligand_idx, lig, list([met]), "H", removed_idx, debug=debug) - if debug >= 2:print(f"{removed_idx=}") - if isadded: - if debug > 0: print(f"\tConnectivity verified for atom {atom.label} with ligand index {ligand_idx}") - conn_idx.append(idx) - final_ligand_indices.append(atom.get_parent_index("ligand")) - good_atoms.append(atom) + tmplabels = [atom.label, met.label] + tmpcoord = [atom.coord, met.coord] + isconnected, tmpadjmat, tmpadjnum = get_adjmatrix(tmplabels, tmpcoord, metal_only=True) + if isconnected and any(tmpadjnum) > 0: + if debug > 0 : print(f"\tAtom {atom.label} is connected to metal {met.label} (atom {ligand_idx=}) (metal group.metals index {jdx=})") + isadded, newlab, newcoord = add_atom(lig.labels, lig.coord, ligand_idx, lig, list([met]), "H", removed_idx, debug=debug) + if debug >= 2:print(f"{removed_idx=}") + if isadded: + if debug > 0: print(f"\tConnectivity verified for atom {atom.label} with ligand index {ligand_idx}") + conn_idx.append(idx) + final_ligand_indices.append(atom.get_parent_index("ligand")) + good_atoms.append(atom) + else: + if debug > 0: print(f"\tCORRECT mconnec of atom {atom.label} with ligand index {ligand_idx}") + isremoved = True + removed_idx.append(ligand_idx) + ### Reset Connectivity of the atom and the parents + atom.reset_mconnec(met, debug=debug) + met.get_coord_sphere() + met.get_coord_sphere_formula() + # Group will be redifined using split_group, so we don't need to remove the atom from group + # group.remove_atom(idx, debug=debug) else: - if debug > 0: print(f"\tCORRECT mconnec of atom {atom.label} with ligand index {ligand_idx}") - isremoved = True - removed_idx.append(ligand_idx) - ### Reset Connectivity of the atom and the parents - atom.reset_mconnec(met, debug=debug) - met.get_coord_sphere() - met.get_coord_sphere_formula() - # Group will be redifined using split_group, so we don't need to remove the atom from group - # group.remove_atom(idx, debug=debug) + if debug > 0 : print(f"\tAtom {atom.label} is not connected to metal {met.label} (atom {ligand_idx=}) (metal group.metals index {jdx=})") conn_idx = sorted(list(set(conn_idx))) @@ -450,10 +457,10 @@ def coordination_correction_for_haptic (group: object, debug: int=0): if debug >=1 : print(f"\t!!! Wrong metal-coordination assignment for Atom", idx, atom.label , get_dist(atom.coord, metal.coord), "due to H") if debug >=1 : print(atom.label) atom.reset_mconnec(metal, debug=debug) - elif std_dev > thres_std and ratio > thres_ratio : - if debug >=1 : print(f"\t!!! Wrong metal-coordination assignment for Atom", idx, atom.label , get_dist(atom.coord, metal.coord), "due to the long distance") - if debug >=1 : print(atom.label) - atom.reset_mconnec(metal, debug=debug) + # elif std_dev > thres_std and ratio > thres_ratio : + # if debug >=1 : print(f"\t!!! Wrong metal-coordination assignment for Atom", idx, atom.label , get_dist(atom.coord, metal.coord), "due to the long distance") + # if debug >=1 : print(atom.label) + # atom.reset_mconnec(metal, debug=debug) else : conn_idx.append(idx) final_ligand_indices.append(atom.get_parent_index("ligand")) diff --git a/cell2mol/final_c2m_driver.py b/cell2mol/final_c2m_driver.py index dfbbb984..9ecae8f1 100644 --- a/cell2mol/final_c2m_driver.py +++ b/cell2mol/final_c2m_driver.py @@ -35,7 +35,10 @@ def handle_cif_file(input_path, system_type, name, current_dir, debug_mode): exit_with_error_exception(e) elif system_type == "unitcell": print("Processing unit cell from .cif file") - process_unitcell(input_path, name, current_dir, debug_mode) + try: + process_unitcell(input_path, name, current_dir, debug_mode) + except Exception as e: + exit_with_error_exception(e) else: exit_with_error_input("Invalid system type for .cif file", {"system_type": system_type}) diff --git a/cell2mol/missingH.py b/cell2mol/missingH.py index b01ee0f3..e4ab17d7 100755 --- a/cell2mol/missingH.py +++ b/cell2mol/missingH.py @@ -56,7 +56,7 @@ def get_missingH_from_adjacency(Z, center, points, bonded_atom_labels): if bonded_atom_labels[0] == "O" or bonded_atom_labels[0] == "N": # CO or CN missingH = False else: - shapeval = "more than 1 (possibly 3 in missing H in methyl)" + shapeval = "more than 1 (possibly missing H in methyl)" missingH = True elif val_e == shapeval : missingH = False diff --git a/cell2mol/read_write.py b/cell2mol/read_write.py index 7f370833..9a4eeb30 100755 --- a/cell2mol/read_write.py +++ b/cell2mol/read_write.py @@ -495,3 +495,75 @@ def print_output(moleclist): print(f"{idx}: {mol.subtype}({mol.type}) {mol.formula}") #{mol.totcharge=} {mol.spin=}\n {mol.smiles}") print("") +###################################################### +def print_refmoleclist (cell): + for i, ref in enumerate(cell.refmoleclist): + if hasattr(ref, "totcharge"): + if ref.iscomplex: + print(f"Reference Molecule {i}: {ref.formula} {ref.totcharge=} (Complex)\n{ref}") + else: + print(f"Reference Molecule {i} : {ref.formula} {ref.smiles=} {ref.totcharge=} (Non-complex)") + else: + if ref.iscomplex: + print(f"Reference Molecule {i}: {ref.formula} (Complex)") + else: + print(f"Reference Molecule {i} : {ref.formula} (Non-complex)") + + if ref.iscomplex: + for met in ref.metals: + if hasattr(met, "charge"): + print(f"\t{met.formula} {met.coord_sphere_formula=} {met.coord_geometry=} {met.geom_deviation=} {met.coord_nr=} {met.charge=}") + else: + print(f"\t{met.formula} {met.coord_sphere_formula=}{met.coord_geometry=} {met.geom_deviation=} {met.coord_nr=}") + for lig in ref.ligands: + if hasattr(lig, "totcharge"): + print(f"\t{lig.formula} {lig.smiles=} {lig.is_haptic=} {lig.haptic_type=} {lig.denticity=} {lig.totcharge=}") + else: + print(f"\t{lig.formula} {lig.is_haptic=} {lig.haptic_type=} {lig.denticity=}") + for group in lig.groups: + print(f"\t|--(group){group.labels} {group.is_haptic=} {group.haptic_type=} {group.denticity=} {group.closest_metal.label=}") + # for met in group.metals: + # print(f"\t|--(group.metals){met.label} {met.mconnec=}") +###################################################### +def print_unique_species (cell): + print(f"Unique Species in {cell.subtype}:") + for specie in cell.unique_species: + if specie.subtype == "metal": + if hasattr(specie, "charge"): + print(f"\t{specie.unique_index=} {specie.formula} ({specie.subtype}) {specie.coord_sphere_formula=} {specie.charge=}") + else: + print(f"\t{specie.unique_index=} {specie.formula} ({specie.subtype}) {specie.coord_sphere_formula=}") + else: + if hasattr(specie, "totcharge"): + print(f"\t{specie.unique_index=} {specie.formula} ({specie.subtype}) {specie.smiles=} {specie.totcharge=}") + else: + print(f"\t{specie.unique_index=} {specie.formula} ({specie.subtype})") +###################################################### +def print_moleclist (cell): + for i, mol in enumerate(cell.moleclist): + if hasattr(mol, "totcharge"): + if mol.iscomplex: + print(f"Unitcell Molecule {i}: {mol.formula} {mol.totcharge=} (Complex)\n{mol}") + else: + print(f"Unitcell Molecule {i} : {mol.formula} {mol.smiles=} {mol.totcharge=} (Non-complex)") + else: + if mol.iscomplex: + print(f"Unitcell Molecule {i}: {mol.formula} (Complex)") + else: + print(f"Unitcell Molecule {i} : {mol.formula} (Non-complex)") + + if mol.iscomplex: + for met in mol.metals: + if hasattr(met, "charge"): + print(f"\t{met.formula} {met.coord_sphere_formula=} {met.coord_geometry=} {met.geom_deviation=} {met.coord_nr=} {met.charge=}") + else: + print(f"\t{met.formula} {met.coord_sphere_formula=} {met.coord_geometry=} {met.geom_deviation=} {met.coord_nr=}") + for lig in mol.ligands: + if hasattr(lig, "totcharge"): + print(f"\t{lig.formula} {lig.smiles=} {lig.is_haptic=} {lig.haptic_type=} {lig.denticity=} {lig.totcharge=}") + else: + print(f"\t{lig.formula} {lig.is_haptic=} {lig.haptic_type=} {lig.denticity=}") + for group in lig.groups: + print(f"\t|--(group){group.labels} {group.is_haptic=} {group.haptic_type=} {group.denticity=} {group.closest_metal.label=}") + # for met in group.metals: + # print(f"\t|--(group.metals){met.label} {met.mconnec=}") \ No newline at end of file diff --git a/cell2mol/refcell.py b/cell2mol/refcell.py index fc2d1a5d..03b31c48 100644 --- a/cell2mol/refcell.py +++ b/cell2mol/refcell.py @@ -3,7 +3,7 @@ from ase.io import read from contextlib import redirect_stdout from cell2mol.classes import cell -from cell2mol.read_write import get_wyckoff_positions +from cell2mol.read_write import get_wyckoff_positions, print_refmoleclist, print_unique_species from cell2mol.cell_operations import frac2cart_fromparam from cell2mol.new_cell_reconstruction import modify_cov_factor_due_to_H, modify_cov_factor_due_to_possible_charges from cell2mol.other import handle_error @@ -40,6 +40,9 @@ def process_refcell(input_path, name, current_dir, debug=0): get_unique_species_in_reference(refcell, debug) else: print(f"Error occurred in processing reference cell: error case {refcell.error_case}") + print_refmoleclist(refcell) + if hasattr(refcell, "unique_species"): + print_unique_species(refcell) refcell.save(ref_cell_fname) # if os.path.exists(ref_cell_fname): # with open(output_fname, "a") as output: @@ -79,7 +82,16 @@ def process_refcell(input_path, name, current_dir, debug=0): error_fname = os.path.join(current_dir, f"reference_error_{refcell.error_case}.out") with open(error_fname, "w") as error_output: with redirect_stdout(error_output): - handle_error(refcell.error_case) + if refcell.error_case == 2 or refcell.error_case == 3 or refcell.error_case == 4 : + handle_error(2) + if refcell.error_case == 2: + print(" - Missing Hydrogens in Water Molecules") + elif refcell.error_case == 3: + print(" - Missing Hydrogens in Coordinated Water Molecules") + elif refcell.error_case == 4: + print(" - Missing Hydrogens in Carbon Atoms") + else : + handle_error(refcell.error_case) return refcell def create_reference (input_path, name, cell_vector, cell_param, debug): @@ -109,6 +121,20 @@ def get_unique_species_in_reference (refcell, debug): #refcell = modify_cov_factor_due_to_possible_charges(refcell, debug=debug) refcell.get_selected_cs(debug=debug) refcell.assess_errors(mode="possible_charges") + + print("Results of possible charges") + for specie in refcell.species_list: + # for specie in refcell.unique_species: + if hasattr(specie, "possible_cs"): + if specie.subtype == "metal": + print(f"{specie.unique_index=} {specie.formula} ({specie.subtype}) {specie.coord_sphere_formula} {specie.possible_cs=}") + else: + print(f"{specie.unique_index=} {specie.formula} ({specie.subtype}) {specie.possible_cs=}") + else: + if specie.subtype == "metal": + print(f"{specie.unique_index=} {specie.formula} ({specie.subtype}) {specie.coord_sphere_formula} No possible cs") + else: + print(f"{specie.unique_index=} {specie.formula}, {specie.subtype} No possible cs") #[p.subtype for p in specie.parents]) tend = time.time() if debug >= 1: print(f"\nAssign possible charges of Reference molecules. Total execution time: {tend - tini:.2f} seconds") diff --git a/cell2mol/test/check_Cell_object.ipynb b/cell2mol/test/check_Cell_object.ipynb index a05d603f..06174521 100644 --- a/cell2mol/test/check_Cell_object.ipynb +++ b/cell2mol/test/check_Cell_object.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 6, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -11,7 +11,7 @@ "'2023.09.6'" ] }, - "execution_count": 6, + "execution_count": 1, "metadata": {}, "output_type": "execute_result" } @@ -44,6 +44,331 @@ "elemdatabase = ElementData()" ] }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "refcode = \"CESMUK\"\n", + "refcell = np.load(f\"{refcode}/Ref_Cell_{refcode}.cell\", allow_pickle=True)\n", + "unitcell = np.load(f\"{refcode}/Cell_{refcode}.cell\", allow_pickle=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "28\n", + "H5-C5 ligand 0 No possible cs ['molecule', 'unitcell', 'reference']\n", + "C-O ligand 1 No possible cs ['molecule', 'unitcell', 'reference']\n", + "C-O ligand 1 No possible cs ['molecule', 'unitcell', 'reference']\n", + "Fe metal 2 No possible cs ['molecule', 'unitcell', 'reference']\n", + "Mg molecule 5 No possible cs ['unitcell', 'reference', 'molecule']\n", + "H8-C4-O molecule 3 No possible cs ['unitcell', 'reference', 'molecule']\n", + "Mg molecule 5 No possible cs ['unitcell', 'reference', 'molecule']\n", + "H5-C5 ligand 0 No possible cs ['molecule', 'unitcell', 'reference']\n", + "C-O ligand 1 No possible cs ['molecule', 'unitcell', 'reference']\n", + "C-O ligand 1 No possible cs ['molecule', 'unitcell', 'reference']\n", + "Fe metal 2 No possible cs ['molecule', 'unitcell', 'reference']\n", + "Mg molecule 5 No possible cs ['unitcell', 'reference', 'molecule']\n", + "H8-C4-O molecule 3 No possible cs ['unitcell', 'reference', 'molecule']\n", + "Mg molecule 5 No possible cs ['unitcell', 'reference', 'molecule']\n", + "H8-C4-O molecule 3 No possible cs ['unitcell', 'reference', 'molecule']\n", + "H41-C29-N2 molecule 4 No possible cs ['unitcell', 'reference', 'molecule']\n", + "H5-C5 ligand 0 No possible cs ['molecule', 'unitcell', 'reference']\n", + "C-O ligand 1 No possible cs ['molecule', 'unitcell', 'reference']\n", + "C-O ligand 1 No possible cs ['molecule', 'unitcell', 'reference']\n", + "Fe metal 2 No possible cs ['molecule', 'unitcell', 'reference']\n", + "H41-C29-N2 molecule 4 No possible cs ['unitcell', 'reference', 'molecule']\n", + "H8-C4-O molecule 3 No possible cs ['unitcell', 'reference', 'molecule']\n", + "H41-C29-N2 molecule 4 No possible cs ['unitcell', 'reference', 'molecule']\n", + "H5-C5 ligand 0 No possible cs ['molecule', 'unitcell', 'reference']\n", + "C-O ligand 1 No possible cs ['molecule', 'unitcell', 'reference']\n", + "C-O ligand 1 No possible cs ['molecule', 'unitcell', 'reference']\n", + "Fe metal 2 No possible cs ['molecule', 'unitcell', 'reference']\n", + "H41-C29-N2 molecule 4 No possible cs ['unitcell', 'reference', 'molecule']\n" + ] + } + ], + "source": [ + "cell = refcell\n", + "\n", + "print(len(cell.species_list))\n", + "for specie in cell.species_list:\n", + "# for specie in refcell.unique_species:\n", + "\n", + " if hasattr(specie, \"possible_cs\"):\n", + " print(specie.formula, specie.subtype, specie.unique_index, specie.possible_cs, [p.subtype for p in specie.parents])\n", + " else:\n", + " print(specie.formula, specie.subtype, specie.unique_index, \"No possible cs\", [p.subtype for p in specie.parents])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[------------- Cell2mol MOLECULE Object --------------\n", + " Version = 2.0\n", + " Type = specie\n", + " Sub-Type = molecule\n", + " Number of Atoms = 15\n", + " Formula = H5-C7-O2-Fe\n", + " Covalent Radii Factor = 1.3\n", + " Metal Radii Factor = 1.0\n", + " Has Adjacency Matrix = YES\n", + " Total Charge = -1\n", + " Spin = 1\n", + " Smiles = ['[H]C1=C([H])[C-]([H])C([H])=C1[H]', '[C-]#[O+]', '[C-]#[O+]']\n", + " Origin = cell.reconstruct\n", + " Number of Ligands = 3\n", + " Number of Metals = 1\n", + " ---------------------------------------------------,\n", + " ------------- Cell2mol MOLECULE Object --------------\n", + " Version = 2.0\n", + " Type = specie\n", + " Sub-Type = molecule\n", + " Number of Atoms = 1\n", + " Formula = Mg\n", + " Covalent Radii Factor = 1.3\n", + " Metal Radii Factor = 1.0\n", + " Has Adjacency Matrix = YES\n", + " Total Charge = 2\n", + " Spin = 1\n", + " Smiles = [Mg+2]\n", + " Origin = cell.reconstruct\n", + " ---------------------------------------------------,\n", + " ------------- Cell2mol MOLECULE Object --------------\n", + " Version = 2.0\n", + " Type = specie\n", + " Sub-Type = molecule\n", + " Number of Atoms = 13\n", + " Formula = H8-C4-O\n", + " Covalent Radii Factor = 1.3\n", + " Metal Radii Factor = 1.0\n", + " Has Adjacency Matrix = YES\n", + " Total Charge = 0\n", + " Spin = 1\n", + " Smiles = [H]C1([H])OC([H])([H])C([H])([H])C1([H])[H]\n", + " Origin = cell.reconstruct\n", + " ---------------------------------------------------,\n", + " ------------- Cell2mol MOLECULE Object --------------\n", + " Version = 2.0\n", + " Type = specie\n", + " Sub-Type = molecule\n", + " Number of Atoms = 1\n", + " Formula = Mg\n", + " Covalent Radii Factor = 1.3\n", + " Metal Radii Factor = 1.0\n", + " Has Adjacency Matrix = YES\n", + " Total Charge = 2\n", + " Spin = 1\n", + " Smiles = [Mg+2]\n", + " Origin = cell.reconstruct\n", + " ---------------------------------------------------,\n", + " ------------- Cell2mol MOLECULE Object --------------\n", + " Version = 2.0\n", + " Type = specie\n", + " Sub-Type = molecule\n", + " Number of Atoms = 15\n", + " Formula = H5-C7-O2-Fe\n", + " Covalent Radii Factor = 1.3\n", + " Metal Radii Factor = 1.0\n", + " Has Adjacency Matrix = YES\n", + " Total Charge = -1\n", + " Spin = 1\n", + " Smiles = ['[H]C1=C([H])[C-]([H])C([H])=C1[H]', '[C-]#[O+]', '[C-]#[O+]']\n", + " Origin = cell.reconstruct\n", + " Number of Ligands = 3\n", + " Number of Metals = 1\n", + " ---------------------------------------------------,\n", + " ------------- Cell2mol MOLECULE Object --------------\n", + " Version = 2.0\n", + " Type = specie\n", + " Sub-Type = molecule\n", + " Number of Atoms = 1\n", + " Formula = Mg\n", + " Covalent Radii Factor = 1.3\n", + " Metal Radii Factor = 1.0\n", + " Has Adjacency Matrix = YES\n", + " Total Charge = 2\n", + " Spin = 1\n", + " Smiles = [Mg+2]\n", + " Origin = cell.reconstruct\n", + " ---------------------------------------------------,\n", + " ------------- Cell2mol MOLECULE Object --------------\n", + " Version = 2.0\n", + " Type = specie\n", + " Sub-Type = molecule\n", + " Number of Atoms = 13\n", + " Formula = H8-C4-O\n", + " Covalent Radii Factor = 1.3\n", + " Metal Radii Factor = 1.0\n", + " Has Adjacency Matrix = YES\n", + " Total Charge = 0\n", + " Spin = 1\n", + " Smiles = [H]C1([H])OC([H])([H])C([H])([H])C1([H])[H]\n", + " Origin = cell.reconstruct\n", + " ---------------------------------------------------,\n", + " ------------- Cell2mol MOLECULE Object --------------\n", + " Version = 2.0\n", + " Type = specie\n", + " Sub-Type = molecule\n", + " Number of Atoms = 1\n", + " Formula = Mg\n", + " Covalent Radii Factor = 1.3\n", + " Metal Radii Factor = 1.0\n", + " Has Adjacency Matrix = YES\n", + " Total Charge = 2\n", + " Spin = 1\n", + " Smiles = [Mg+2]\n", + " Origin = cell.reconstruct\n", + " ---------------------------------------------------,\n", + " ------------- Cell2mol MOLECULE Object --------------\n", + " Version = 2.0\n", + " Type = specie\n", + " Sub-Type = molecule\n", + " Number of Atoms = 13\n", + " Formula = H8-C4-O\n", + " Covalent Radii Factor = 1.3\n", + " Metal Radii Factor = 1.0\n", + " Has Adjacency Matrix = YES\n", + " Total Charge = 0\n", + " Spin = 1\n", + " Smiles = [H]C1([H])OC([H])([H])C([H])([H])C1([H])[H]\n", + " Origin = cell.reconstruct\n", + " ---------------------------------------------------,\n", + " ------------- Cell2mol MOLECULE Object --------------\n", + " Version = 2.0\n", + " Type = specie\n", + " Sub-Type = molecule\n", + " Number of Atoms = 72\n", + " Formula = H41-C29-N2\n", + " Covalent Radii Factor = 1.3\n", + " Metal Radii Factor = 1.0\n", + " Has Adjacency Matrix = YES\n", + " Total Charge = -1\n", + " Spin = 1\n", + " Smiles = [H]C(C(=Nc1c(C([H])(C([H])([H])[H])C([H])([H])[H])c([H])c([H])c([H])c1C([H])(C([H])([H])[H])C([H])([H])[H])C([H])([H])[H])=C([N-]c1c(C([H])(C([H])([H])[H])C([H])([H])[H])c([H])c([H])c([H])c1C([H])(C([H])([H])[H])C([H])([H])[H])C([H])([H])[H]\n", + " Origin = cell.reconstruct\n", + " ---------------------------------------------------,\n", + " ------------- Cell2mol MOLECULE Object --------------\n", + " Version = 2.0\n", + " Type = specie\n", + " Sub-Type = molecule\n", + " Number of Atoms = 15\n", + " Formula = H5-C7-O2-Fe\n", + " Covalent Radii Factor = 1.3\n", + " Metal Radii Factor = 1.0\n", + " Has Adjacency Matrix = YES\n", + " Total Charge = -1\n", + " Spin = 1\n", + " Smiles = ['[H]C1=C([H])[C-]([H])C([H])=C1[H]', '[C-]#[O+]', '[C-]#[O+]']\n", + " Origin = cell.reconstruct\n", + " Number of Ligands = 3\n", + " Number of Metals = 1\n", + " ---------------------------------------------------,\n", + " ------------- Cell2mol MOLECULE Object --------------\n", + " Version = 2.0\n", + " Type = specie\n", + " Sub-Type = molecule\n", + " Number of Atoms = 72\n", + " Formula = H41-C29-N2\n", + " Covalent Radii Factor = 1.3\n", + " Metal Radii Factor = 1.0\n", + " Has Adjacency Matrix = YES\n", + " Total Charge = -1\n", + " Spin = 1\n", + " Smiles = [H]C(C(=Nc1c(C([H])(C([H])([H])[H])C([H])([H])[H])c([H])c([H])c([H])c1C([H])(C([H])([H])[H])C([H])([H])[H])C([H])([H])[H])=C([N-]c1c(C([H])(C([H])([H])[H])C([H])([H])[H])c([H])c([H])c([H])c1C([H])(C([H])([H])[H])C([H])([H])[H])C([H])([H])[H]\n", + " Origin = cell.reconstruct\n", + " ---------------------------------------------------,\n", + " ------------- Cell2mol MOLECULE Object --------------\n", + " Version = 2.0\n", + " Type = specie\n", + " Sub-Type = molecule\n", + " Number of Atoms = 13\n", + " Formula = H8-C4-O\n", + " Covalent Radii Factor = 1.3\n", + " Metal Radii Factor = 1.0\n", + " Has Adjacency Matrix = YES\n", + " Total Charge = 0\n", + " Spin = 1\n", + " Smiles = [H]C1([H])OC([H])([H])C([H])([H])C1([H])[H]\n", + " Origin = cell.reconstruct\n", + " ---------------------------------------------------,\n", + " ------------- Cell2mol MOLECULE Object --------------\n", + " Version = 2.0\n", + " Type = specie\n", + " Sub-Type = molecule\n", + " Number of Atoms = 72\n", + " Formula = H41-C29-N2\n", + " Covalent Radii Factor = 1.3\n", + " Metal Radii Factor = 1.0\n", + " Has Adjacency Matrix = YES\n", + " Total Charge = -1\n", + " Spin = 1\n", + " Smiles = [H]C(C(=Nc1c(C([H])(C([H])([H])[H])C([H])([H])[H])c([H])c([H])c([H])c1C([H])(C([H])([H])[H])C([H])([H])[H])C([H])([H])[H])=C([N-]c1c(C([H])(C([H])([H])[H])C([H])([H])[H])c([H])c([H])c([H])c1C([H])(C([H])([H])[H])C([H])([H])[H])C([H])([H])[H]\n", + " Origin = cell.reconstruct\n", + " ---------------------------------------------------,\n", + " ------------- Cell2mol MOLECULE Object --------------\n", + " Version = 2.0\n", + " Type = specie\n", + " Sub-Type = molecule\n", + " Number of Atoms = 15\n", + " Formula = H5-C7-O2-Fe\n", + " Covalent Radii Factor = 1.3\n", + " Metal Radii Factor = 1.0\n", + " Has Adjacency Matrix = YES\n", + " Total Charge = -1\n", + " Spin = 1\n", + " Smiles = ['[H]C1=C([H])[C-]([H])C([H])=C1[H]', '[C-]#[O+]', '[C-]#[O+]']\n", + " Origin = cell.reconstruct\n", + " Number of Ligands = 3\n", + " Number of Metals = 1\n", + " ---------------------------------------------------,\n", + " ------------- Cell2mol MOLECULE Object --------------\n", + " Version = 2.0\n", + " Type = specie\n", + " Sub-Type = molecule\n", + " Number of Atoms = 72\n", + " Formula = H41-C29-N2\n", + " Covalent Radii Factor = 1.3\n", + " Metal Radii Factor = 1.0\n", + " Has Adjacency Matrix = YES\n", + " Total Charge = -1\n", + " Spin = 1\n", + " Smiles = [H]C(C(=Nc1c(C([H])(C([H])([H])[H])C([H])([H])[H])c([H])c([H])c([H])c1C([H])(C([H])([H])[H])C([H])([H])[H])C([H])([H])[H])=C([N-]c1c(C([H])(C([H])([H])[H])C([H])([H])[H])c([H])c([H])c([H])c1C([H])(C([H])([H])[H])C([H])([H])[H])C([H])([H])[H]\n", + " Origin = cell.reconstruct\n", + " ---------------------------------------------------]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cell = unitcell\n", + "cell.moleclist\n", + "# print(len(cell.moleculeist))\n", + "# for specie in cell.species_list:\n", + "# # for specie in refcell.unique_species:\n", + "\n", + "# if hasattr(specie, \"possible_cs\"):\n", + "# print(specie.formula, specie.subtype, specie.unique_index, specie.possible_cs, [p.subtype for p in specie.parents])\n", + "# else:\n", + "# print(specie.formula, specie.subtype, specie.unique_index, \"No possible cs\", [p.subtype for p in specie.parents])\n" + ] + }, { "cell_type": "code", "execution_count": 8, diff --git a/cell2mol/test/check_radius.ipynb b/cell2mol/test/check_radius.ipynb index a8c5eb21..948ef074 100644 --- a/cell2mol/test/check_radius.ipynb +++ b/cell2mol/test/check_radius.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -51,6 +51,351 @@ "1.3*(elemdatabase.CovalentRadius3[\"Fe\"] + elemdatabase.CovalentRadius3[\"N\"])" ] }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2.63" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "elemdatabase.CovalentRadius3[\"Y\"] + elemdatabase.CovalentRadius3[\"C\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from collections import defaultdict\n", + "\n", + "from cell2mol.elementdata import ElementData\n", + "from cell2mol.connectivity import labels2formula\n", + "elemdatabase = ElementData()\n", + "\n", + "###############################\n", + "#### RUBEN Changes for V14 ####\n", + "###############################\n", + "valence_electrons = []\n", + "for i in elemdatabase.elementsym:\n", + " try:\n", + " valence_electrons.append(\n", + " elemdatabase.valenceelectrons[elemdatabase.elementsym[i]]\n", + " )\n", + " except KeyError:\n", + " continue\n", + "\n", + "atomic_valence_electrons = dict(zip(elemdatabase.elementsym, valence_electrons))\n", + "\n", + "\n", + "def get_atomic_valences(k):\n", + " symb = elemdatabase.elementsym[k]\n", + " block = elemdatabase.elementblock[symb]\n", + " group = elemdatabase.elementgroup[symb]\n", + " period = elemdatabase.elementperiod[symb]\n", + " ave = elemdatabase.valenceelectrons[symb]\n", + " if k == 5: # B\n", + " return [3, 4]\n", + " if k == 7: # N\n", + " return [3, 4]\n", + " if k == 8: # O\n", + " return [2, 1, 3]\n", + " if k == 15: # P\n", + " return [6, 5, 3] # [5,4,3]\n", + " if k == 33: # As\n", + " return [6, 5, 3] # [5,4,3]\n", + " if k == 51: # Sb\n", + " return [6, 5, 3] # [5,4,3]\n", + " if k in [16, 34]: # S, Se\n", + " return [6, 3, 2, 1] # [6,4,2]\n", + " if k in [17]: # Cl\n", + " return [1, 7]\n", + " if k in [53]: # I\n", + " return [1, 2]\n", + " if block == \"s\" and period == 1:\n", + " av = 2 - ave\n", + " elif block == \"s\" and period != 1:\n", + " av = ave\n", + " elif group == 18:\n", + " av = 0\n", + " elif block == \"p\" and group != 18:\n", + " av = 8 - ave\n", + " elif block in \"d\":\n", + " av = 20 - ave\n", + " else:\n", + " av = 1\n", + " return [av]\n", + "\n", + "\n", + "atomic_valence = defaultdict(list)\n", + "for k in elemdatabase.elementsym:\n", + " try:\n", + " # print(f\"Getting atomic valence for element number {k}={elemdatabase.elementsym[k]}\")\n", + " av = get_atomic_valences(k)\n", + " atomic_valence[k].extend(av)\n", + " # print(f\"Got atomic valence {atomic_valence[k]} as a result.\")\n", + " except KeyError:\n", + " continue" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Li\n", + "Na\n", + "K\n", + "Rb\n", + "Cs\n", + "Fr\n" + ] + } + ], + "source": [ + "for k in elemdatabase.elementname:\n", + " try:\n", + " if elemdatabase.elementgroup[k]==1 and k != \"H\" and k != \"D\":\n", + " print(k)\n", + " except :\n", + " pass" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Al\n", + "Ga\n", + "Ge\n", + "In\n", + "Sn\n", + "Sb\n", + "Tl\n", + "Pb\n", + "Bi\n", + "Na\n", + "Mg\n", + "K\n", + "Ca\n", + "Sr\n", + "Ba\n" + ] + } + ], + "source": [ + "elem_list = [\"Al\", \"Ga\", \"Ge\", \"In\", \"Sn\", \"Sb\", \"Tl\", \"Pb\", \"Bi\", \"Na\", \"Mg\", \"K\", \"Ca\", \"Sr\", \"Ba\"]\n", + "for elem in elem_list :\n", + " print(elem)\n", + " # print(elem, elemdatabase.CovalentRadius3[elem], elemdatabase.elementname[elem], atomic_valence[elemdatabase.elementnr[elem]])" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Is ring compound (square ring)? False\n", + "Is ring compound (linear chain)? False\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "import networkx as nx\n", + "\n", + "def is_haptic_ring(adjacency_matrix):\n", + " # Convert adjacency matrix to a NetworkX graph\n", + " G = nx.from_numpy_array(np.array(adjacency_matrix))\n", + " \n", + " # Check if the graph is connected\n", + " if not nx.is_connected(G):\n", + " return False # If not connected, can't form a single ring\n", + " \n", + " # Check if all nodes have degree 2\n", + " for node, degree in G.degree():\n", + " if degree != 2:\n", + " return False # If any node doesn't have degree 2, it's not a simple ring\n", + "\n", + " # Check if the graph has exactly one cycle\n", + " cycles = list(nx.simple_cycles(nx.DiGraph(G)))\n", + " if len(cycles) == 1 and len(cycles[0]) == G.number_of_nodes():\n", + " return True # The graph is a simple ring compound\n", + "\n", + " return False # Otherwise, not a ring compound\n", + "\n", + "\n", + "# Example Usage:\n", + "# Adjacency matrix for a ring of 4 atoms (square)\n", + "adj_matrix_ring = [\n", + " [0, 1, 0, 1],\n", + " [1, 0, 1, 0],\n", + " [0, 1, 0, 1],\n", + " [1, 0, 1, 0],\n", + "]\n", + "\n", + "# Adjacency matrix for a chain (not a ring)\n", + "adj_matrix_chain = [\n", + " [0, 1, 0, 0],\n", + " [1, 0, 1, 0],\n", + " [0, 1, 0, 1],\n", + " [0, 0, 1, 0],\n", + "]\n", + "\n", + "print(\"Is ring compound (square ring)?\", is_ring_compound(adj_matrix_ring)) # True\n", + "print(\"Is ring compound (linear chain)?\", is_ring_compound(adj_matrix_chain)) # False\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.7890000000000001" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "3.419 - 2.63" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3.419" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "1.3*(elemdatabase.CovalentRadius3[\"Y\"] + elemdatabase.CovalentRadius3[\"C\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3.71" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "elemdatabase.CovalentRadius3[\"Nd\"] + elemdatabase.CovalentRadius3[\"Sc\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1.1130000000000004" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "4.823 - (elemdatabase.CovalentRadius3[\"Nd\"] + elemdatabase.CovalentRadius3[\"Sc\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1.7" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "elemdatabase.CovalentRadius3[\"Sc\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "4.823" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "1.3*(elemdatabase.CovalentRadius3[\"Nd\"] + elemdatabase.CovalentRadius3[\"Sc\"])" + ] + }, { "cell_type": "code", "execution_count": 12, diff --git a/cell2mol/unitcell.py b/cell2mol/unitcell.py index 8b571a8a..0bad3a2d 100644 --- a/cell2mol/unitcell.py +++ b/cell2mol/unitcell.py @@ -8,6 +8,7 @@ from cell2mol.new_c2m_module import cell2mol from cell2mol.new_charge_assignment import assign_charge_to_specie from cell2mol.other import handle_error +from cell2mol.read_write import print_refmoleclist, print_unique_species, print_moleclist import copy # Constants VERSION = "2.0" @@ -127,6 +128,9 @@ def perform_cell2mol(newcell, refcell, sym_ops, cell_fname, ref_cell_fname, debu refcell.assign_spin(debug=debug) refcell.create_bonds(debug=debug) refcell.save(ref_cell_fname) + print_refmoleclist(newcell) + print_unique_species(newcell) + print_moleclist(newcell) newcell.save(cell_fname) diff --git a/cell2mol/xyz2mol.py b/cell2mol/xyz2mol.py index e07fb581..ce14e97a 100644 --- a/cell2mol/xyz2mol.py +++ b/cell2mol/xyz2mol.py @@ -542,8 +542,8 @@ def AC2BO(AC, atoms, charge, allow_charged_fragments=True, use_graph=True): best_BO = AC.copy() BO_is_OK_list = [] - print(f"AC2BO: {formula=} {len(valences_list)=}") - max_count = 1000 + max_count = max(1000, int(len(valences_list)*0.3)) + print(f"AC2BO: {formula=} {len(valences_list)=} {max_count=}") count = 0 for valences in valences_list: