Skip to content

Commit

Permalink
Debugging fragments_reconstruct YOBCUO
Browse files Browse the repository at this point in the history
  • Loading branch information
choglass committed Apr 11, 2024
1 parent 2ed756e commit 0cfeccf
Show file tree
Hide file tree
Showing 6 changed files with 4,469 additions and 26 deletions.
17 changes: 15 additions & 2 deletions cell2mol/cell_reconstruction.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,8 +202,12 @@ def fragments_reconstruct(moleclist: list, fraglist: list, Hlist: list, refmolec
# Reconstruct Hydrogens with remaining Fragments
if len(remfrag) > 0 and len(Hlist) > 0:
print("FRAG_RECONSTRUCT.", len(fraglist), "molecules submitted to sequential with All")
for frag in fraglist:
print(frag.formula, frag.subtype, frag.labels)
finalmols, remfrag = sequential(fraglist, refmoleclist, cellvec, factor, metal_factor, "All", debug)
moleclist.extend(finalmols)
print(f"{finalmols=}")
print(f"{remfrag=}")
if len(remfrag) > 0: Warning = True; print("FRAG_RECONSTRUCT. Remaining after Hydrogen reconstruction",remfrag)
else: Warning = False; print("FRAG_RECONSTRUCT. No remaining Molecules after Hydrogen reconstruction")
elif len(remfrag) > 0 and len(Hlist) == 0:
Expand Down Expand Up @@ -343,7 +347,13 @@ def sequential(fragmentlist: list, refmoleclist: list, cellvec: list, factor: fl
for i in range(0, len(list2)):
if i == Frag2_toallocate: sublist.append(list2[i])
elif i != Frag2_toallocate: keeplist2.append(list2[i])


print(f"sublist", len(sublist), [s.formula for s in sublist] )
print("list1", len(list1), [s.formula for s in list1])
print("list2", len(list2),[s.formula for s in list2])
print(f"keeplist1", len(keeplist1), [s.formula for s in keeplist1])
print(f"keeplist2", len(keeplist2), [s.formula for s in keeplist2])
print("")
#################
# This part evaluates that the fragments that are going to be combined, can form one of the reference molecules. The resulting number of atoms is used.
#################
Expand All @@ -357,7 +367,10 @@ def sequential(fragmentlist: list, refmoleclist: list, cellvec: list, factor: fl
# Here, the function "combine" is called. It will try cell translations of one fragment, and check whether it eventually combines with the second fragment into either a bigger fragment or a molecule
#################
goodlist, avglist, badlist = combine(sublist, refmoleclist, cellvec, threshold_tmat, factor, metal_factor, debug=debug)

print(f"{goodlist=}")
print(f"{avglist=}")
print(f"{badlist=}")

#################
# This part handles the results of combine
#################
Expand Down
90 changes: 90 additions & 0 deletions cell2mol/charge_assignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -898,6 +898,96 @@ def arrange_data_for_reorder(reference: object, target: object, debug: int=0):
ref_data.append(data)
return ref_data, target_data

#######################################################
def set_charges_create_bonds (specie, unique_indices, unique_species, final_charge_distribution, debug):

spec = unique_species[specie.unique_index]
indices = [index for index, value in enumerate(unique_indices) if value == specie.unique_index]
target_charge = [final_charge_distribution[i] for i in indices][0]
if debug > 1: print(spec, indices, target_charge)

if (specie.subtype == "molecule" and specie.iscomplex == False) or (specie.subtype == "ligand"):
formula = specie.formula
charge_list = [cs.corr_total_charge for cs in spec.possible_cs]

elif specie.subtype == "metal":
formula = specie.label
charge_list = spec.possible_cs

if target_charge in charge_list:
if debug > 1: print(f"Target charge {target_charge} of {formula} exists in {charge_list}." )
else:
if debug > 1: print(f"ERROR: Target charge {target_charge} of {formula} does not exist in {charge_list}." )
return None

if (specie.subtype == "molecule" and specie.iscomplex == False) or (specie.subtype == "ligand"):
print(specie.formula)
specie.get_protonation_states(debug=debug)
specie.get_possible_cs(debug=debug)
formula = specie.formula
charge_list = [cs.corr_total_charge for cs in specie.possible_cs]

if target_charge in charge_list:
if debug > 1: print(f"Target charge {target_charge} of {formula} exists in {charge_list}.")
idx = charge_list.index(target_charge)
cs = specie.possible_cs[idx]
#prot = cs.protonation
specie.set_charges(cs.corr_total_charge, cs.corr_atom_charges, cs.smiles, cs.rdkit_obj)
specie.create_bonds(debug=debug)
else:
if debug > 1: print(f"ERROR: Target charge {target_charge} of {formula} does not exist in {charge_list}." )
return None

elif specie.subtype == "metal":
print(specie.label)
specie.get_possible_cs(debug=debug)
formula = specie.label
charge_list = spec.possible_cs

if target_charge in charge_list:
if debug > 1: print(f"Target charge {target_charge} of {formula} exists in {charge_list}." )
idx = charge_list.index(target_charge)
cs = specie.possible_cs[idx]
specie.set_charge(cs)
else:
if debug > 1: print(f"ERROR: Target charge {target_charge} of {formula} does not exist in {charge_list}." )
return None
#######################################################
def prepare_mols_v4 (moleclist: list, unique_indices: list, unique_species: list, final_charge_distribution: list, debug: int=0):
count = 0
for mol in moleclist:
if mol.iscomplex == False:
set_charges_create_bonds(mol, unique_indices, unique_species, final_charge_distribution, debug)
count += 1

elif mol.iscomplex:
tmp_atcharge = np.zeros((mol.natoms))
tmp_smiles = []

for lig in mol.ligands:
set_charges_create_bonds(lig, unique_indices, unique_species, final_charge_distribution, debug)
count += 1

tmp_smiles.append(lig.smiles)
parent_indices = lig.get_parent_indices("molecule")
for kdx, a in enumerate(parent_indices):
tmp_atcharge[a] = lig.atomic_charges[kdx]

for met in mol.metals:
set_charges_create_bonds(met, unique_indices, unique_species, final_charge_distribution, debug)
count += 1
parent_index = met.get_parent_index("molecule")
tmp_atcharge[parent_index] = met.charge

mol.set_charges(int(sum(tmp_atcharge)), atomic_charges=tmp_atcharge, smiles=tmp_smiles)

if count != len(final_charge_distribution):
Warning = True
else:
Warning = False

return moleclist, Warning

#######################################################
def prepare_mols(moleclist: list, unique_indices: list, unique_species: list, selected_cs: list, final_charge_distribution: list, debug: int=0) -> Tuple[list, bool]:
# The charge and connectivity of a given specie in the unit cell is only determined for one representative case. i
Expand Down
7 changes: 5 additions & 2 deletions cell2mol/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from cell2mol.cell_reconstruction import classify_fragments, fragments_reconstruct
from cell2mol.cell_operations import cart2frac, frac2cart_fromparam
from cell2mol.charge_assignment import get_protonation_states_specie, get_possible_charge_state, get_metal_poscharges
from cell2mol.charge_assignment import balance_charge, prepare_unresolved, prepare_mols, correct_smiles_ligand
from cell2mol.charge_assignment import balance_charge, prepare_unresolved, prepare_mols, correct_smiles_ligand, prepare_mols_v4
from cell2mol.spin import assign_spin_metal, assign_spin_complexes
from cell2mol.other import extract_from_list, compute_centroid, get_dist, get_angle
from cell2mol.elementdata import ElementData
Expand Down Expand Up @@ -1421,12 +1421,15 @@ def assign_charges(self, debug: int=0) -> object:
self.error_empty_distrib = True
return None
else: # Only one possible charge distribution -> getcharge for the repeated species
self.error_multiple_distrib = False
self.error_empty_distrib = False
if debug >= 1:
print(f"\nFINAL Charge Distribution: {final_charge_distribution}\n")
print("#########################################")
print("Assigning Charges and Preparing Molecules")
print("#########################################")
self.moleclist, self.error_prepare_mols = prepare_mols(self.moleclist, self.unique_indices, self.unique_species, selected_cs, final_charge_distribution[0], debug=debug)
self.moleclist, self.error_prepare_mols = prepare_mols_v4 (self.moleclist, self.unique_indices, self.unique_species, final_charge_distribution[0], debug=debug)
# self.moleclist, self.error_prepare_mols = prepare_mols(self.moleclist, self.unique_indices, self.unique_species, selected_cs, final_charge_distribution[0], debug=debug)
if self.error_prepare_mols: return None # Error while preparing molecules

return self.moleclist
Expand Down
2 changes: 1 addition & 1 deletion cell2mol/missingH.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def check_missingH(refmoleclist: list, debug: int=0):
if a.label == "C" and a.mconnec == 0:
bonded_atom_coord = []
for adj in a.adjacency:
bonded_atom_coord.append(lig.coord[adj])
bonded_atom_coord.append(lig.get_parent("molecule").coord[adj])
ismissingH, report = get_missingH_from_adjacency(a.atnum, a.coord, bonded_atom_coord)
if ismissingH:
if debug >= 2: print("")
Expand Down
90 changes: 69 additions & 21 deletions cell2mol/test/test_one_by_one_INOVAL.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 6,
"id": "48e219ea",
"metadata": {},
"outputs": [],
Expand All @@ -57,10 +57,10 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 7,
"id": "09c5cef3",
"metadata": {
"scrolled": false
"scrolled": true
},
"outputs": [
{
Expand Down Expand Up @@ -609,21 +609,75 @@
"\n",
"# Reads reference molecules from info file, as well as labels and coordinates\n",
"labels, pos, ref_labels, ref_fracs, cellvec, cellparam = readinfo(infopath)\n",
"\n",
"# Initiates cell\n",
"newcell = cell(name, labels, pos, cellvec, cellparam)\n",
"# # Loads the reference molecules and checks_missing_H\n",
"# # TODO : reconstruct the unit cell without using reference molecules\n",
"# # TODO : reconstruct the unit cell using (only reconstruction of) reference molecules and Space group\n",
"newcell.get_reference_molecules(ref_labels, ref_fracs, debug=debug) \n",
"newcell.assess_errors()\n",
"# newcell.save(ref_cell_fname)\n",
"# ######################\n",
"# ### CALLS CELL2MOL ###\n",
"# ######################\n",
"# print(f\"ENTERING cell2mol with debug={debug}\")\n",
"# cell = cell2mol(newcell, reconstruction=True, charge_assignment=True, spin_assignment=True, debug=debug)\n",
"# cell.assess_errors()\n",
"# cell.save(cell_fname)"
"newcell.assess_errors()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "694fbb29",
"metadata": {},
"outputs": [],
"source": [
"from cell2mol.missingH import get_missingH_from_adjacency\n",
"Missing_H_in_C = False\n",
"Missing_H_in_CoordWater = False\n",
"ismissingH = False\n",
"Warning = False\n",
"\n",
"# List of Metal Atoms for which O atoms might appear connected directly.\n",
"Exceptions_for_CoordWater = [\"Re\", \"V\", \"Mo\", \"W\"]\n",
"\n",
"if debug >= 2: print(\"\")\n",
"if debug >= 2: print(\"##################\")\n",
"if debug >= 2: print(\"Checking Missing H\")\n",
"if debug >= 2: print(\"##################\")\n",
"for idx, ref in enumerate(newcell.refmoleclist):\n",
" if not ref.iscomplex:\n",
" if ref.natoms == 1 and \"O\" in ref.labels: \n",
" Missing_H_in_CoordWater = True\n",
" if debug >= 2: print(f\"WARNING found isolated O atom in the cell. This tends to be a water with missing H, so stopping\")\n",
" else:\n",
" for kdx, a in enumerate(ref.atoms):\n",
" if not hasattr(a,\"adjacency\"): continue \n",
" if a.label == \"C\":\n",
" bonded_atom_coord = []\n",
" for adj in a.adjacency:\n",
" bonded_atom_coord.append(ref.coord[adj])\n",
" ismissingH, report = get_missingH_from_adjacency(a.atnum, a.coord, bonded_atom_coord)\n",
" if ismissingH:\n",
" if debug >= 2: print(\"\")\n",
" if debug >= 2: print(f\"WARNING in Missing H function for: {ref.type}, {idx}, {ref.labels}\")\n",
" if debug >= 2: print(f\"C Atom {kdx} has missing H atoms\")\n",
" if debug >= 2: print(report)\n",
" Missing_H_in_C = True\n",
" else:\n",
" for jdx, lig in enumerate(ref.ligands):\n",
" if lig.natoms == 1 and \"O\" in lig.labels and lig.denticity <= 1:\n",
" if any(m.label in Exceptions_for_CoordWater for m in lig.metalatoms): pass\n",
" else:\n",
" Missing_H_in_CoordWater = True\n",
" if debug >= 2: print(\"\")\n",
" if debug >= 2: print(\"WARNING in Missing H function for ligand\",lig.natoms,lig.labels)\n",
" else:\n",
" for kdx, a in enumerate(lig.atoms):\n",
" if a.label == \"C\" and a.mconnec == 0:\n",
" bonded_atom_coord = []\n",
" print(a.label, a.adjacency, len(lig.coord))\n",
" for adj in a.adjacency:\n",
" bonded_atom_coord.append(lig.coord[adj])\n",
" ismissingH, report = get_missingH_from_adjacency(a.atnum, a.coord, bonded_atom_coord)\n",
" if ismissingH:\n",
" if debug >= 2: print(\"\")\n",
" if debug >= 2: print(f\"WARNING in Missing H function for: {ref.type}, {idx}, {jdx}, {lig.labels}\")\n",
" if debug >= 2: print(f\"Atom {kdx} has missing H atoms\")\n",
" if debug >= 2: print(report)\n",
" Missing_H_in_C = True"
]
},
{
Expand Down Expand Up @@ -1130,13 +1184,7 @@
"smiles='[H]C([H])([H])C([H])([H])C([H])([H])C([H])([H])[N+](C([H])([H])C([H])([H])C([H])([H])C([H])([H])[H])(C([H])([H])C([H])([H])C([H])([H])C([H])([H])[H])C([H])([H])C([H])([H])C([H])([H])C([H])([H])[H]'\n",
"smiles='[H]C([H])([H])C([H])([H])C([H])([H])C([H])([H])[N+](C([H])([H])C([H])([H])C([H])([H])C([H])([H])[H])(C([H])([H])C([H])([H])C([H])([H])C([H])([H])[H])C([H])([H])C([H])([H])C([H])([H])C([H])([H])[H]'\n",
"smiles='[H]C([H])([H])C([H])([H])C([H])([H])C([H])([H])[N+](C([H])([H])C([H])([H])C([H])([H])C([H])([H])[H])(C([H])([H])C([H])([H])C([H])([H])C([H])([H])[H])C([H])([H])C([H])([H])C([H])([H])C([H])([H])[H]'\n",
"smiles='[H]C([H])([H])C([H])([H])C([H])([H])C([H])([H])[N+](C([H])([H])C([H])([H])C([H])([H])C([H])([H])[H])(C([H])([H])C([H])([H])C([H])([H])C([H])([H])[H])C([H])([H])C([H])([H])C([H])([H])C([H])([H])[H]'\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"smiles='[H]C([H])([H])C([H])([H])C([H])([H])C([H])([H])[N+](C([H])([H])C([H])([H])C([H])([H])C([H])([H])[H])(C([H])([H])C([H])([H])C([H])([H])C([H])([H])[H])C([H])([H])C([H])([H])C([H])([H])C([H])([H])[H]'\n",
"smiles='[H]C([H])([H])C([H])([H])C([H])([H])C([H])([H])[N+](C([H])([H])C([H])([H])C([H])([H])C([H])([H])[H])(C([H])([H])C([H])([H])C([H])([H])C([H])([H])[H])C([H])([H])C([H])([H])C([H])([H])C([H])([H])[H]'\n",
"smiles='[H]C([H])([H])C([H])([H])C([H])([H])C([H])([H])[N+](C([H])([H])C([H])([H])C([H])([H])C([H])([H])[H])(C([H])([H])C([H])([H])C([H])([H])C([H])([H])[H])C([H])([H])C([H])([H])C([H])([H])C([H])([H])[H]'\n",
"H36-C16-N\n",
Expand Down
Loading

0 comments on commit 0cfeccf

Please sign in to comment.