Skip to content

Commit

Permalink
Debugging KeyError for water missing H and for Deuterium
Browse files Browse the repository at this point in the history
  • Loading branch information
choglass committed Nov 13, 2024
1 parent 2ec3135 commit 57f53b6
Show file tree
Hide file tree
Showing 6 changed files with 67 additions and 49 deletions.
2 changes: 1 addition & 1 deletion cell2mol/charge_assignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,7 @@ def get_protonation_states_specie(specie: object, debug: int=0) -> list:
if ligand.NO_type == "Linear":
if debug >= 2: print(" GET_PROTONATION_STATES: Found Linear Nitrosyl")
elemlist[idx] = "O"
addedlist[idx] = 2
addedlist[idx] = 1
metal_electrons[idx] = 1
elif ligand.NO_type == "Bent":
if debug >= 2: print(" GET_PROTONATION_STATES: Found Bent Nitrosyl")
Expand Down
13 changes: 6 additions & 7 deletions cell2mol/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1334,14 +1334,12 @@ def get_reference_molecules(self, ref_labels: list, ref_fracs: list, cov_factor:
refcell.get_subtype("reference")
# Get reference molecules
blocklist = split_species(ref_labels, ref_pos, cov_factor=cov_factor)
print(blocklist)
self.refmoleclist = []
for b in blocklist:
mol_labels = extract_from_list(b, ref_labels, dimension=1)
mol_coord = extract_from_list(b, ref_pos, dimension=1)
mol_frac_coord = extract_from_list(b, ref_fracs, dimension=1)
newmolec = molecule(mol_labels, mol_coord, mol_frac_coord)
print(newmolec)
newmolec.add_parent(self, indices=b)
newmolec.add_parent(refcell, indices=b)
newmolec.set_adjacency_parameters(cov_factor, metal_factor)
Expand All @@ -1354,20 +1352,20 @@ def get_reference_molecules(self, ref_labels: list, ref_fracs: list, cov_factor:

if debug >= 0: print(f"GETREFS: found {len(self.refmoleclist)} reference molecules")
if debug >= 0: print(f"GETREFS:", [ref.formula for ref in self.refmoleclist])
if debug >= 0: print(f"GETREFS: {self.refmoleclist}")
# Checks for isolated atoms, and retrieves warning if there is any. Except if it is H, halogen (group 17) or alkalyne (group 2)
isgood = True
for ref in self.refmoleclist:
if ref.natoms == 1:
label = ref.atoms[0].label
group = elemdatabase.elementgroup[label]
if (group == 1 or group == 2 or group == 17) and label != "H": pass
else:
if label == "H" or label == "D":
isgood = False
else: #(group == 1 or group == 2 or group == 17)
if debug >= 0: print(f"GETREFS: found ref molecule with only one atom {ref.labels}")

# If all good, then works with the reference molecules
if isgood:
self.has_isolated_H = False
for ref in self.refmoleclist:
if debug >= 0: print(f"GETREFS: working with {ref.formula}")
if ref.iscomplex:
Expand All @@ -1377,8 +1375,9 @@ def get_reference_molecules(self, ref_labels: list, ref_fracs: list, cov_factor:
for met in ref.metals:
met.get_coordination_geometry(debug=debug)
met.get_coord_sphere_formula()
if isgood: self.has_isolated_H = False
else: self.has_isolated_H = True
else:
self.has_isolated_H = True

return self.refmoleclist

#######################################################
Expand Down
32 changes: 16 additions & 16 deletions cell2mol/missingH.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,15 +108,15 @@ def check_missingH(refmoleclist: list, debug: int=0):
# List of Metal Atoms for which O atoms might appear connected directly.
Exceptions_for_CoordWater = ["Re", "V", "Mo", "W", "Fe"]

if debug >= 2: print("")
if debug >= 2: print("##################")
if debug >= 2: print("Checking Missing H")
if debug >= 2: print("##################")
if debug >= 1: print("")
if debug >= 1: print("##################")
if debug >= 1: print("Checking Missing H")
if debug >= 1: print("##################")
for idx, ref in enumerate(refmoleclist):
if not ref.iscomplex:
if ref.natoms == 1 and "O" in ref.labels:
Missing_H_in_CoordWater = True
if debug >= 2: print(f"WARNING found isolated O atom in the cell. This tends to be a water with missing H, so stopping")
if debug >= 1: print(f"WARNING found isolated O atom in the cell. This tends to be a water with missing H, so stopping")
else:
for kdx, a in enumerate(ref.atoms):
if not hasattr(a,"adjacency"): continue
Expand All @@ -133,19 +133,19 @@ def check_missingH(refmoleclist: list, debug: int=0):
for label, coord in zip(bonded_atom_labels, bonded_atom_coord):
print("Dist", f"{a.label}-{label}", get_dist(a.coord, coord))
if ismissingH:
if debug >= 2: print("")
if debug >= 2: print(f"WARNING in Missing H function for: {ref.type}, {idx}, {ref.labels}")
if debug >= 2: print(f"C Atom {kdx} {a.get_parent_index('molecule')} has missing H atoms")
if debug >= 2: print(report)
if debug >= 1: print("")
if debug >= 1: print(f"WARNING in Missing H function for: {ref.type}, {idx}, {ref.labels}")
if debug >= 1: print(f"C Atom {kdx} {a.get_parent_index('molecule')} has missing H atoms")
if debug >= 1: print(report)
Missing_H_in_C = True
else:
for jdx, lig in enumerate(ref.ligands):
if lig.natoms == 1 and "O" in lig.labels and lig.denticity <= 1:
if any(m.label in Exceptions_for_CoordWater for m in lig.metals): pass
else:
Missing_H_in_CoordWater = True
if debug >= 2: print("")
if debug >= 2: print("WARNING in Missing H function for ligand",lig.natoms,lig.labels)
if debug >= 1: print("")
if debug >= 1: print("WARNING in Missing H function for ligand",lig.natoms,lig.labels)
else:
for kdx, a in enumerate(lig.atoms):
if a.label == "C" and a.mconnec == 0:
Expand All @@ -158,14 +158,14 @@ def check_missingH(refmoleclist: list, debug: int=0):
if debug >= 2: print("Adjacency", a.adjacency, bonded_atom_labels)
ismissingH, report = get_missingH_from_adjacency(a.atnum, a.coord, bonded_atom_coord)
if ismissingH:
if debug >= 2: print("")
if debug >= 2: print(f"WARNING in Missing H function for: {ref.type}, {idx}, {jdx}, {lig.labels}")
if debug >= 2: print(f"C Atom {kdx} {a.get_parent_index('molecule')} has missing H atoms")
if debug >= 2: print(report)
if debug >= 1: print("")
if debug >= 1: print(f"WARNING in Missing H function for: {ref.type}, {idx}, {jdx}, {lig.labels}")
if debug >= 1: print(f"C Atom {kdx} {a.get_parent_index('molecule')} has missing H atoms")
if debug >= 1: print(report)
Missing_H_in_C = True

if Missing_H_in_C or Missing_H_in_CoordWater: Warning = True
if not Warning:
if debug >= 2: print("Not a Single Molecule has Missing H atoms (apparently)")
if debug >= 1: print("Not a Single Molecule has Missing H atoms (apparently)")

return Warning, ismissingH, Missing_H_in_C, Missing_H_in_CoordWater
45 changes: 29 additions & 16 deletions cell2mol/new_cell_reconstruction.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
from cell2mol.connectivity import split_species, count_species, compare_reference_indices
from cell2mol.cell_operations import translate
from itertools import combinations
# from cell2mol.read_write import writexyz
# import pickle
from cell2mol.elementdata import ElementData
elemdatabase = ElementData()


######################################################
def modify_cov_factor_due_to_H (refcell, debug: int=0):
Expand All @@ -16,11 +17,11 @@ def modify_cov_factor_due_to_H (refcell, debug: int=0):
refcell.check_missing_H(debug=debug)
else:
if debug >= 1: print(f"Initial covalent factor: {cov_factor=} before increasing")
while refcell.has_isolated_H :
while refcell.has_isolated_H and cov_factor < 1.5:
# Increase covalent factor for H atoms
cov_factor += 0.05
refcell.get_reference_molecules(refcell.labels, refcell.frac_coord, cov_factor=cov_factor, debug=0)
if debug >= 1: print(f"Covalent factor increases: {cov_factor=}")
if debug >= 1: print(f"Covalent factor increases: {cov_factor=}")
refcell.check_missing_H(debug=debug)
refcell.assess_errors(mode="hydrogens")
return refcell
Expand Down Expand Up @@ -63,16 +64,20 @@ def apply_symmetry_operations_reference (refcell, cell_vector, sym_ops, normaliz
new_structures = []
ref_labels = refcell.labels
fractional_coords = np.array(refcell.frac_coord)
# reference : ase atoms object
reference = Atoms(symbols=ref_labels, scaled_positions=fractional_coords, cell=cell_vector, pbc=pbc)

if "D" in ref_labels:
numbers = [elemdatabase.elementnr[elem] for elem in ref_labels] # Atoms object cannot handle Deuterium in the symbols

for rot, trans in zip(sym_ops[0], sym_ops[1]):
transformed_positions = np.dot(fractional_coords, rot.T)
transformed_positions += np.array(trans)
if normalize:
transformed_positions = np.remainder(transformed_positions, 1)
new = Atoms(symbols=ref_labels, scaled_positions=transformed_positions, cell=cell_vector)
if "D" in ref_labels:
new = Atoms(scaled_positions=transformed_positions, numbers=numbers, cell=cell_vector)
else:
new = Atoms(symbols=ref_labels, scaled_positions=transformed_positions, cell=cell_vector)

new_structures.append(new)

return new_structures
Expand Down Expand Up @@ -400,7 +405,7 @@ def fragments_reconstruct (subset_remaining_fragments, target_ref, cell_vector,
return list_of_found_molecules, list_of_bigger_fragments

######################################################
def get_updated_indices(sp_idx, new, cell_labels, cell_pos, cell_fracs, debug: int=0):
def get_updated_indices(sp_idx, ref_labels, new, cell_labels, cell_pos, cell_fracs, debug: int=0):
"""
sp_idx : index of the symmetry operation
new : ase atoms object by applying symmetry operations to the reference structure
Expand All @@ -409,11 +414,11 @@ def get_updated_indices(sp_idx, new, cell_labels, cell_pos, cell_fracs, debug: i
cell_fracs : list of fractional coordinates of the atoms in the unit cell
"""
indices_lists = []
new_labels = new.get_chemical_symbols()
# new_labels = new.get_chemical_symbols()
new_pos = new.get_positions()
new_fracs = new.get_scaled_positions()

for jdx, (n_l, n_p, n_f) in enumerate(zip(new_labels, new_pos, new_fracs)):
for jdx, (n_l, n_p, n_f) in enumerate(zip(ref_labels, new_pos, new_fracs)):
for kdx, (l, p, f) in enumerate(zip(cell_labels, cell_pos, cell_fracs)):
if n_l == l and np.allclose(n_p, p, atol=1e-4, rtol=1e-2):
if np.allclose(np.remainder(n_f, 1), np.remainder(f, 1), atol=1e-4, rtol=1e-2):
Expand All @@ -423,7 +428,7 @@ def get_updated_indices(sp_idx, new, cell_labels, cell_pos, cell_fracs, debug: i
return indices_lists

######################################################
def get_updated_indices_old (new, cell_labels, cell_pos, cell_fracs, all_found, debug: int=0):
def get_updated_indices_old (ref_labels, new, cell_labels, cell_pos, cell_fracs, all_found, debug: int=0):

# new structure : ase atoms object by applying symmetry operations to the reference structure
# Find the indices of the atoms in the unit cell that have the same cartisian coordinates as the atoms in the new structure
Expand All @@ -435,8 +440,9 @@ def get_updated_indices_old (new, cell_labels, cell_pos, cell_fracs, all_found,
updated = [i for i in found_indices if i not in all_found]

new_fracs = new.get_scaled_positions()
new_labels = new.get_chemical_symbols()

# new_labels = new.get_chemical_symbols()
new_labels = ref_labels

indices_in_ref = [ find_row_index_from_matrix(new_fracs, cell_fracs[u]) for u in updated ]
if debug >= 2:
print("Get indices in reference")
Expand Down Expand Up @@ -538,10 +544,17 @@ def determine_wrap_keywords_pbc (atoms,refcell, wrap_keywords, debug: int=0):
def reconstuct (refcell, newcell, sym_ops, debug: int=0):

cell_labels = newcell.labels
print(f"{cell_labels=}")
if "D" in cell_labels:
print("Deuterium is in the cell")
cell_pos = newcell.coord
cell_fracs = newcell.frac_coord
cell_vector = newcell.cell_vector


ref_labels = refcell.labels
print(f"{ref_labels=}")
if "D" in ref_labels:
print("Deuterium is in the reference")
cov_factor = refcell.refmoleclist[0].cov_factor
metal_factor = refcell.refmoleclist[0].metal_factor

Expand All @@ -555,7 +568,7 @@ def reconstuct (refcell, newcell, sym_ops, debug: int=0):

for idx, new in enumerate(new_structures):
print(f"\nApplying symmetry operations to reference {idx}")
indices_lists = get_updated_indices(idx, new, cell_labels, cell_pos, cell_fracs, debug=debug)
indices_lists = get_updated_indices(idx, ref_labels, new, cell_labels, cell_pos, cell_fracs, debug=debug)
print(f"{len(indices_lists)=}")

updated_lists = [i for i in indices_lists if i[1] not in all_found]
Expand Down
2 changes: 1 addition & 1 deletion cell2mol/new_charge_assignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def set_charge_state(reference, target, mode, debug: int=0):
target_data = target.get_parent_indices("reference")
index_map = {value: index for index, value in enumerate(target_data)}
sorted_indices = sorted(range(len(ref_data)), key=lambda i: index_map[ref_data[i]])

print(f"SET_CHARGE_STATE:{sorted_indices=}")
reordered_prot = temp_prot.reorder(sorted_indices)
# reordered_prot.coords = target.coord
if debug >=1 : print(f"SET_CHARGE_STATE:({target.subtype}) {target.formula} {reference.charge_state.uncorr_total_charge=} Reordered {sorted_indices=}")
Expand Down
22 changes: 14 additions & 8 deletions cell2mol/unitcell.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,22 +40,28 @@ def process_unitcell(input_path, name, current_dir, debug=0):
newcell = create_unitcell_object(name, cell_labels, cell_pos, cell_fracs, cell_vector, cell_param, "unitcell")

perform_cell2mol(newcell, refcell, sym_ops, cell_fname, ref_cell_fname, debug)

# Handle error cases for the unit cell
if hasattr(newcell, 'error_case'):
error_fname = os.path.join(current_dir, f"unitcell_error_{newcell.error_case}.out")
with open(error_fname, "w") as error_output:
with redirect_stdout(error_output):
handle_error(newcell.error_case)
else:
logging.error("Error encountered while processing the reference cell")

# Handle error cases for the unit cell
if hasattr(newcell, 'error_case'):
error_fname = os.path.join(current_dir, f"unitcell_error_{newcell.error_case}.out")
with open(error_fname, "w") as error_output:
with redirect_stdout(error_output):
handle_error(newcell.error_case)

return newcell

def get_cell_parameters(structure):
"""Extracts cell parameters and symmetry operations from structure."""
wrap_keywords = {'pbc': True, 'center': (0.5, 0.5, 0.5)}
cell_labels = structure.get_chemical_symbols()
cell_labels = []
for l, n, m in zip(structure.get_chemical_symbols(), structure.get_atomic_numbers(), structure.get_masses()):
if n == 1 and (m > 2 or m== 2.01355) : # Deuterium
cell_labels.append("D")
else:
cell_labels.append(l)

cell_pos = structure.get_positions(wrap=True, **wrap_keywords)
cell_fracs = structure.get_scaled_positions()
cell_vector = structure.cell.array
Expand Down

0 comments on commit 57f53b6

Please sign in to comment.