From 57f53b67f0f831ba1dc5a01f2d08bd552a03dd24 Mon Sep 17 00:00:00 2001 From: my-name Date: Wed, 13 Nov 2024 17:58:23 +0100 Subject: [PATCH] Debugging KeyError for water missing H and for Deuterium --- cell2mol/charge_assignment.py | 2 +- cell2mol/classes.py | 13 ++++----- cell2mol/missingH.py | 32 ++++++++++---------- cell2mol/new_cell_reconstruction.py | 45 +++++++++++++++++++---------- cell2mol/new_charge_assignment.py | 2 +- cell2mol/unitcell.py | 22 +++++++++----- 6 files changed, 67 insertions(+), 49 deletions(-) diff --git a/cell2mol/charge_assignment.py b/cell2mol/charge_assignment.py index 108a9eaa..fbb67bb4 100644 --- a/cell2mol/charge_assignment.py +++ b/cell2mol/charge_assignment.py @@ -410,7 +410,7 @@ def get_protonation_states_specie(specie: object, debug: int=0) -> list: if ligand.NO_type == "Linear": if debug >= 2: print(" GET_PROTONATION_STATES: Found Linear Nitrosyl") elemlist[idx] = "O" - addedlist[idx] = 2 + addedlist[idx] = 1 metal_electrons[idx] = 1 elif ligand.NO_type == "Bent": if debug >= 2: print(" GET_PROTONATION_STATES: Found Bent Nitrosyl") diff --git a/cell2mol/classes.py b/cell2mol/classes.py index 9d48ab31..149a9ad7 100644 --- a/cell2mol/classes.py +++ b/cell2mol/classes.py @@ -1334,14 +1334,12 @@ def get_reference_molecules(self, ref_labels: list, ref_fracs: list, cov_factor: refcell.get_subtype("reference") # Get reference molecules blocklist = split_species(ref_labels, ref_pos, cov_factor=cov_factor) - print(blocklist) self.refmoleclist = [] for b in blocklist: mol_labels = extract_from_list(b, ref_labels, dimension=1) mol_coord = extract_from_list(b, ref_pos, dimension=1) mol_frac_coord = extract_from_list(b, ref_fracs, dimension=1) newmolec = molecule(mol_labels, mol_coord, mol_frac_coord) - print(newmolec) newmolec.add_parent(self, indices=b) newmolec.add_parent(refcell, indices=b) newmolec.set_adjacency_parameters(cov_factor, metal_factor) @@ -1354,20 +1352,20 @@ def get_reference_molecules(self, ref_labels: list, ref_fracs: list, cov_factor: if debug >= 0: print(f"GETREFS: found {len(self.refmoleclist)} reference molecules") if debug >= 0: print(f"GETREFS:", [ref.formula for ref in self.refmoleclist]) - if debug >= 0: print(f"GETREFS: {self.refmoleclist}") # Checks for isolated atoms, and retrieves warning if there is any. Except if it is H, halogen (group 17) or alkalyne (group 2) isgood = True for ref in self.refmoleclist: if ref.natoms == 1: label = ref.atoms[0].label group = elemdatabase.elementgroup[label] - if (group == 1 or group == 2 or group == 17) and label != "H": pass - else: + if label == "H" or label == "D": isgood = False + else: #(group == 1 or group == 2 or group == 17) if debug >= 0: print(f"GETREFS: found ref molecule with only one atom {ref.labels}") # If all good, then works with the reference molecules if isgood: + self.has_isolated_H = False for ref in self.refmoleclist: if debug >= 0: print(f"GETREFS: working with {ref.formula}") if ref.iscomplex: @@ -1377,8 +1375,9 @@ def get_reference_molecules(self, ref_labels: list, ref_fracs: list, cov_factor: for met in ref.metals: met.get_coordination_geometry(debug=debug) met.get_coord_sphere_formula() - if isgood: self.has_isolated_H = False - else: self.has_isolated_H = True + else: + self.has_isolated_H = True + return self.refmoleclist ####################################################### diff --git a/cell2mol/missingH.py b/cell2mol/missingH.py index bc17098b..e409281e 100755 --- a/cell2mol/missingH.py +++ b/cell2mol/missingH.py @@ -108,15 +108,15 @@ def check_missingH(refmoleclist: list, debug: int=0): # List of Metal Atoms for which O atoms might appear connected directly. Exceptions_for_CoordWater = ["Re", "V", "Mo", "W", "Fe"] - if debug >= 2: print("") - if debug >= 2: print("##################") - if debug >= 2: print("Checking Missing H") - if debug >= 2: print("##################") + if debug >= 1: print("") + if debug >= 1: print("##################") + if debug >= 1: print("Checking Missing H") + if debug >= 1: print("##################") for idx, ref in enumerate(refmoleclist): if not ref.iscomplex: if ref.natoms == 1 and "O" in ref.labels: Missing_H_in_CoordWater = True - if debug >= 2: print(f"WARNING found isolated O atom in the cell. This tends to be a water with missing H, so stopping") + if debug >= 1: print(f"WARNING found isolated O atom in the cell. This tends to be a water with missing H, so stopping") else: for kdx, a in enumerate(ref.atoms): if not hasattr(a,"adjacency"): continue @@ -133,10 +133,10 @@ def check_missingH(refmoleclist: list, debug: int=0): for label, coord in zip(bonded_atom_labels, bonded_atom_coord): print("Dist", f"{a.label}-{label}", get_dist(a.coord, coord)) if ismissingH: - if debug >= 2: print("") - if debug >= 2: print(f"WARNING in Missing H function for: {ref.type}, {idx}, {ref.labels}") - if debug >= 2: print(f"C Atom {kdx} {a.get_parent_index('molecule')} has missing H atoms") - if debug >= 2: print(report) + if debug >= 1: print("") + if debug >= 1: print(f"WARNING in Missing H function for: {ref.type}, {idx}, {ref.labels}") + if debug >= 1: print(f"C Atom {kdx} {a.get_parent_index('molecule')} has missing H atoms") + if debug >= 1: print(report) Missing_H_in_C = True else: for jdx, lig in enumerate(ref.ligands): @@ -144,8 +144,8 @@ def check_missingH(refmoleclist: list, debug: int=0): if any(m.label in Exceptions_for_CoordWater for m in lig.metals): pass else: Missing_H_in_CoordWater = True - if debug >= 2: print("") - if debug >= 2: print("WARNING in Missing H function for ligand",lig.natoms,lig.labels) + if debug >= 1: print("") + if debug >= 1: print("WARNING in Missing H function for ligand",lig.natoms,lig.labels) else: for kdx, a in enumerate(lig.atoms): if a.label == "C" and a.mconnec == 0: @@ -158,14 +158,14 @@ def check_missingH(refmoleclist: list, debug: int=0): if debug >= 2: print("Adjacency", a.adjacency, bonded_atom_labels) ismissingH, report = get_missingH_from_adjacency(a.atnum, a.coord, bonded_atom_coord) if ismissingH: - if debug >= 2: print("") - if debug >= 2: print(f"WARNING in Missing H function for: {ref.type}, {idx}, {jdx}, {lig.labels}") - if debug >= 2: print(f"C Atom {kdx} {a.get_parent_index('molecule')} has missing H atoms") - if debug >= 2: print(report) + if debug >= 1: print("") + if debug >= 1: print(f"WARNING in Missing H function for: {ref.type}, {idx}, {jdx}, {lig.labels}") + if debug >= 1: print(f"C Atom {kdx} {a.get_parent_index('molecule')} has missing H atoms") + if debug >= 1: print(report) Missing_H_in_C = True if Missing_H_in_C or Missing_H_in_CoordWater: Warning = True if not Warning: - if debug >= 2: print("Not a Single Molecule has Missing H atoms (apparently)") + if debug >= 1: print("Not a Single Molecule has Missing H atoms (apparently)") return Warning, ismissingH, Missing_H_in_C, Missing_H_in_CoordWater diff --git a/cell2mol/new_cell_reconstruction.py b/cell2mol/new_cell_reconstruction.py index 8a6cb5a6..ebbc3037 100644 --- a/cell2mol/new_cell_reconstruction.py +++ b/cell2mol/new_cell_reconstruction.py @@ -6,8 +6,9 @@ from cell2mol.connectivity import split_species, count_species, compare_reference_indices from cell2mol.cell_operations import translate from itertools import combinations -# from cell2mol.read_write import writexyz -# import pickle +from cell2mol.elementdata import ElementData +elemdatabase = ElementData() + ###################################################### def modify_cov_factor_due_to_H (refcell, debug: int=0): @@ -16,11 +17,11 @@ def modify_cov_factor_due_to_H (refcell, debug: int=0): refcell.check_missing_H(debug=debug) else: if debug >= 1: print(f"Initial covalent factor: {cov_factor=} before increasing") - while refcell.has_isolated_H : + while refcell.has_isolated_H and cov_factor < 1.5: # Increase covalent factor for H atoms cov_factor += 0.05 refcell.get_reference_molecules(refcell.labels, refcell.frac_coord, cov_factor=cov_factor, debug=0) - if debug >= 1: print(f"Covalent factor increases: {cov_factor=}") + if debug >= 1: print(f"Covalent factor increases: {cov_factor=}") refcell.check_missing_H(debug=debug) refcell.assess_errors(mode="hydrogens") return refcell @@ -63,16 +64,20 @@ def apply_symmetry_operations_reference (refcell, cell_vector, sym_ops, normaliz new_structures = [] ref_labels = refcell.labels fractional_coords = np.array(refcell.frac_coord) - - # reference : ase atoms object - reference = Atoms(symbols=ref_labels, scaled_positions=fractional_coords, cell=cell_vector, pbc=pbc) + + if "D" in ref_labels: + numbers = [elemdatabase.elementnr[elem] for elem in ref_labels] # Atoms object cannot handle Deuterium in the symbols for rot, trans in zip(sym_ops[0], sym_ops[1]): transformed_positions = np.dot(fractional_coords, rot.T) transformed_positions += np.array(trans) if normalize: transformed_positions = np.remainder(transformed_positions, 1) - new = Atoms(symbols=ref_labels, scaled_positions=transformed_positions, cell=cell_vector) + if "D" in ref_labels: + new = Atoms(scaled_positions=transformed_positions, numbers=numbers, cell=cell_vector) + else: + new = Atoms(symbols=ref_labels, scaled_positions=transformed_positions, cell=cell_vector) + new_structures.append(new) return new_structures @@ -400,7 +405,7 @@ def fragments_reconstruct (subset_remaining_fragments, target_ref, cell_vector, return list_of_found_molecules, list_of_bigger_fragments ###################################################### -def get_updated_indices(sp_idx, new, cell_labels, cell_pos, cell_fracs, debug: int=0): +def get_updated_indices(sp_idx, ref_labels, new, cell_labels, cell_pos, cell_fracs, debug: int=0): """ sp_idx : index of the symmetry operation new : ase atoms object by applying symmetry operations to the reference structure @@ -409,11 +414,11 @@ def get_updated_indices(sp_idx, new, cell_labels, cell_pos, cell_fracs, debug: i cell_fracs : list of fractional coordinates of the atoms in the unit cell """ indices_lists = [] - new_labels = new.get_chemical_symbols() + # new_labels = new.get_chemical_symbols() new_pos = new.get_positions() new_fracs = new.get_scaled_positions() - for jdx, (n_l, n_p, n_f) in enumerate(zip(new_labels, new_pos, new_fracs)): + for jdx, (n_l, n_p, n_f) in enumerate(zip(ref_labels, new_pos, new_fracs)): for kdx, (l, p, f) in enumerate(zip(cell_labels, cell_pos, cell_fracs)): if n_l == l and np.allclose(n_p, p, atol=1e-4, rtol=1e-2): if np.allclose(np.remainder(n_f, 1), np.remainder(f, 1), atol=1e-4, rtol=1e-2): @@ -423,7 +428,7 @@ def get_updated_indices(sp_idx, new, cell_labels, cell_pos, cell_fracs, debug: i return indices_lists ###################################################### -def get_updated_indices_old (new, cell_labels, cell_pos, cell_fracs, all_found, debug: int=0): +def get_updated_indices_old (ref_labels, new, cell_labels, cell_pos, cell_fracs, all_found, debug: int=0): # new structure : ase atoms object by applying symmetry operations to the reference structure # Find the indices of the atoms in the unit cell that have the same cartisian coordinates as the atoms in the new structure @@ -435,8 +440,9 @@ def get_updated_indices_old (new, cell_labels, cell_pos, cell_fracs, all_found, updated = [i for i in found_indices if i not in all_found] new_fracs = new.get_scaled_positions() - new_labels = new.get_chemical_symbols() - + # new_labels = new.get_chemical_symbols() + new_labels = ref_labels + indices_in_ref = [ find_row_index_from_matrix(new_fracs, cell_fracs[u]) for u in updated ] if debug >= 2: print("Get indices in reference") @@ -538,10 +544,17 @@ def determine_wrap_keywords_pbc (atoms,refcell, wrap_keywords, debug: int=0): def reconstuct (refcell, newcell, sym_ops, debug: int=0): cell_labels = newcell.labels + print(f"{cell_labels=}") + if "D" in cell_labels: + print("Deuterium is in the cell") cell_pos = newcell.coord cell_fracs = newcell.frac_coord cell_vector = newcell.cell_vector - + + ref_labels = refcell.labels + print(f"{ref_labels=}") + if "D" in ref_labels: + print("Deuterium is in the reference") cov_factor = refcell.refmoleclist[0].cov_factor metal_factor = refcell.refmoleclist[0].metal_factor @@ -555,7 +568,7 @@ def reconstuct (refcell, newcell, sym_ops, debug: int=0): for idx, new in enumerate(new_structures): print(f"\nApplying symmetry operations to reference {idx}") - indices_lists = get_updated_indices(idx, new, cell_labels, cell_pos, cell_fracs, debug=debug) + indices_lists = get_updated_indices(idx, ref_labels, new, cell_labels, cell_pos, cell_fracs, debug=debug) print(f"{len(indices_lists)=}") updated_lists = [i for i in indices_lists if i[1] not in all_found] diff --git a/cell2mol/new_charge_assignment.py b/cell2mol/new_charge_assignment.py index 093d4a71..66eef9a2 100644 --- a/cell2mol/new_charge_assignment.py +++ b/cell2mol/new_charge_assignment.py @@ -141,7 +141,7 @@ def set_charge_state(reference, target, mode, debug: int=0): target_data = target.get_parent_indices("reference") index_map = {value: index for index, value in enumerate(target_data)} sorted_indices = sorted(range(len(ref_data)), key=lambda i: index_map[ref_data[i]]) - + print(f"SET_CHARGE_STATE:{sorted_indices=}") reordered_prot = temp_prot.reorder(sorted_indices) # reordered_prot.coords = target.coord if debug >=1 : print(f"SET_CHARGE_STATE:({target.subtype}) {target.formula} {reference.charge_state.uncorr_total_charge=} Reordered {sorted_indices=}") diff --git a/cell2mol/unitcell.py b/cell2mol/unitcell.py index cb90ee2c..68914b5a 100644 --- a/cell2mol/unitcell.py +++ b/cell2mol/unitcell.py @@ -40,22 +40,28 @@ def process_unitcell(input_path, name, current_dir, debug=0): newcell = create_unitcell_object(name, cell_labels, cell_pos, cell_fracs, cell_vector, cell_param, "unitcell") perform_cell2mol(newcell, refcell, sym_ops, cell_fname, ref_cell_fname, debug) + + # Handle error cases for the unit cell + if hasattr(newcell, 'error_case'): + error_fname = os.path.join(current_dir, f"unitcell_error_{newcell.error_case}.out") + with open(error_fname, "w") as error_output: + with redirect_stdout(error_output): + handle_error(newcell.error_case) else: logging.error("Error encountered while processing the reference cell") - # Handle error cases for the unit cell - if hasattr(newcell, 'error_case'): - error_fname = os.path.join(current_dir, f"unitcell_error_{newcell.error_case}.out") - with open(error_fname, "w") as error_output: - with redirect_stdout(error_output): - handle_error(newcell.error_case) - return newcell def get_cell_parameters(structure): """Extracts cell parameters and symmetry operations from structure.""" wrap_keywords = {'pbc': True, 'center': (0.5, 0.5, 0.5)} - cell_labels = structure.get_chemical_symbols() + cell_labels = [] + for l, n, m in zip(structure.get_chemical_symbols(), structure.get_atomic_numbers(), structure.get_masses()): + if n == 1 and (m > 2 or m== 2.01355) : # Deuterium + cell_labels.append("D") + else: + cell_labels.append(l) + cell_pos = structure.get_positions(wrap=True, **wrap_keywords) cell_fracs = structure.get_scaled_positions() cell_vector = structure.cell.array