Skip to content

Commit

Permalink
Update balance_charge by including metal OS prediction model for Fe
Browse files Browse the repository at this point in the history
  • Loading branch information
choglass committed Nov 17, 2024
1 parent 2c4a76b commit 2c899a9
Show file tree
Hide file tree
Showing 7 changed files with 109 additions and 108 deletions.
Binary file added cell2mol/Fe_mono_m_ox_5486.pkl
Binary file not shown.
91 changes: 55 additions & 36 deletions cell2mol/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1608,22 +1608,42 @@ def assign_charges (self, debug: int=0):
print("final_charges", final_charges)
if len(final_charge_distribution) > 1:
if debug >= 1: print("More than one Possible Distribution Found:", final_charge_distribution)
self.error_multiple_distrib = True
self.error_empty_distrib = False
pp_mols, pp_idx, pp_opt = prepare_unresolved(self.unique_indices, self.unique_species, final_charge_distribution, debug=debug)
self.data_for_postproc(pp_mols, pp_idx, pp_opt)
return # Stopping.
second_final_charge_distribution, second_final_charges = balance_charge(self.unique_indices, self.unique_species, predict=True, debug=debug)
print("second_final_charge_distribution", second_final_charge_distribution)
print("second_final_charges", second_final_charges)

if len(second_final_charge_distribution) == 1:
self.error_multiple_distrib = False
self.error_empty_distrib = False
final_charge_distribution = second_final_charge_distribution
final_charges = second_final_charges
else:
self.error_multiple_distrib = True
self.error_empty_distrib = False
return # Stopping.

elif len(final_charge_distribution) == 0: #
if debug >= 1: print("No valid Distribution Found", final_charge_distribution)
self.error_multiple_distrib = False
self.error_empty_distrib = True
return # Stopping.
second_final_charge_distribution, second_final_charges = balance_charge(self.unique_indices, self.unique_species, rare=True, debug=debug)
print("second_final_charge_distribution", second_final_charge_distribution)
print("second_final_charges", second_final_charges)

if len(second_final_charge_distribution) == 1:
self.error_multiple_distrib = False
self.error_empty_distrib = False
final_charge_distribution = second_final_charge_distribution
final_charges = second_final_charges

else:
self.error_multiple_distrib = False
self.error_empty_distrib = True
return # Stopping.

else: # Only one possible charge distribution -> getcharge for the repeated species
self.error_multiple_distrib = False
self.error_empty_distrib = False


if self.error_multiple_distrib == False and self.error_empty_distrib == False:
if debug >= 1:
print(f"\nFINAL Charge Distribution: {final_charge_distribution}\n")
print("#########################################")
Expand Down Expand Up @@ -1700,37 +1720,36 @@ def assign_charges (self, debug: int=0):
print("ASSIGN_CHARGES: Non-Complex", idx, mol.formula, mol.totcharge, mol.smiles)
#######################################################
def assign_charges_for_refcell(self, debug: int=0):

for specie in self.unique_species:
for idx, ref in enumerate(self.refmoleclist):
if ref.iscomplex:
for jdx, lig in enumerate(ref.ligands):
print(lig.unique_index)
print(specie.unique_index)
if lig.unique_index == specie.unique_index:
set_charge_state (specie, lig, mode=1, debug=debug)
for kdx, met in enumerate(ref.metals):
if met.unique_index == specie.unique_index:
met.set_charge(specie.charge)
else:
if ref.unique_index == specie.unique_index:
set_charge_state (specie, ref, mode=1, debug=debug)

for idx, ref in enumerate(self.refmoleclist):
if ref.iscomplex: prepare_mol(ref)

for idx, ref in enumerate(self.refmoleclist):
print(f"Refenrence Molecule {idx}: {ref.formula}")
print(f"ASSIGN_CHARGES: Refenrence Molecule {idx}: {ref.formula}")
if ref.iscomplex:
print("ASSIGN_CHARGES: Complex", idx, ref.formula, ref.totcharge)
for jdx, lig in enumerate(ref.ligands):
for specie in self.unique_species:
if specie.subtype == "ligand":
if lig.is_nitrosyl and specie.is_nitrosyl:
if lig.NO_type == specie.NO_type:
issame = True
else:
issmae = False
else:
issame = compare_species(lig, specie)
if issame:
set_charge_state (specie, lig, mode=1, debug=debug)
print(lig.formula, specie.formula, issame)
for met in ref.metals:
for specie in self.unique_species:
if specie.subtype == "metal":
issame = compare_metals(met, specie)
if issame:
met.set_charge(specie.charge)
print(met.formula, specie.formula, issame)
prepare_mol(ref)
print("ASSIGN_CHARGES: Ligand", idx, jdx, lig.formula, lig.totcharge, lig.smiles)
for kdx, met in enumerate(ref.metals):
print("ASSIGN_CHARGES: Metal", idx, kdx, met.formula, met.charge)
else:
for specie in self.unique_species:
if specie.subtype == "molecule":
issame = compare_species(ref, specie)
if issame:
set_charge_state (specie, ref, mode=1, debug=debug)
print(ref.formula, specie.formula, issame)
print("ASSIGN_CHARGES: Non-Complex", idx, ref.formula, ref.totcharge, ref.smiles)


#######################################################
def check_charge_neutrality(self, debug: int=0):
Expand Down
90 changes: 36 additions & 54 deletions cell2mol/new_charge_assignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,35 @@
import copy
from cell2mol.charge_assignment import protonation, get_charge, get_charge_manual
import itertools
import os
from cell2mol import __file__
from cell2mol.spin import generate_feature_vector
import pickle

#######################################################
def balance_charge(unique_indices: list, unique_species: list, debug: int=0) -> list:
def balance_charge(unique_indices: list, unique_species: list, rare: bool="False", predict: bool="False", debug: int=0) -> list:

# Function to Select the Best Charge Distribution for the unique species.
# It accepts multiple charge options for each molecule/ligand/metal (poscharge, etc...).
# NO: It should select the best one depending on whether the final metal charge makes sense or not.
# In some cases, can accept metal oxidation state = 0, if no other makes sense

all_possible_m_ox = [0, 1, 2, 3, 4, 5, 6, 7]
iserror = False
iterlist = []
for idx, spec in enumerate(unique_species):
toadd = []
if spec.subtype == "metal":
for tch in spec.possible_cs:
if rare == True:
rare_m_ox = [x for x in all_possible_m_ox if x not in spec.possible_cs]
print(f"RARE METAL OXIDATION STATES: {spec.formula} {rare_m_ox}")
for tch in rare_m_ox:
toadd.append(tch)
elif predict == True :
tch = predict_metal_ox(spec, debug=debug)
toadd.append(tch)
else:
for tch in spec.possible_cs:
toadd.append(tch)
else :
if len(spec.possible_cs) == 1:
toadd.append(spec.possible_cs[0].corr_total_charge)
Expand Down Expand Up @@ -60,9 +73,9 @@ def balance_charge(unique_indices: list, unique_species: list, debug: int=0) ->
return final_charge_distribution, final_charges
######################################################

def assign_charge_state_for_unique_species(unique_species, final_charges_tuple, debug: int=0):
for specie, final_charge in zip(unique_species, final_charges_tuple):
def assign_charge_state_for_unique_species(unique_species, final_charge_tuple, debug: int=0):

for specie, final_charge in zip(unique_species, final_charge_tuple):
print(specie.unique_index, specie.formula)
if (specie.subtype == "molecule" and specie.iscomplex == False) or (specie.subtype == "ligand"):
charge_list = [cs.corr_total_charge for cs in specie.possible_cs]
Expand All @@ -72,10 +85,10 @@ def assign_charge_state_for_unique_species(unique_species, final_charges_tuple,
# print(specie.charge_state.protonation)
specie.set_charges(cs.corr_total_charge, cs.corr_atom_charges, cs.smiles, cs.rdkit_obj)
elif specie.subtype == "metal" :
charge_list = specie.possible_cs
idx = charge_list.index(final_charge)
cs = specie.possible_cs[idx]
specie.set_charge(cs)
# charge_list = specie.possible_cs
# idx = charge_list.index(final_charge)
# cs = specie.possible_cs[idx]
specie.set_charge(final_charge)
for specie in unique_species:
if (specie.subtype == "molecule" and specie.iscomplex == False) or (specie.subtype == "ligand"):
print(specie.formula, specie.charge_state, specie.totcharge, specie.smiles)
Expand Down Expand Up @@ -342,8 +355,7 @@ def assign_charge_to_specie(specie, final_charge, debug: int=0):
if debug >= 1: print(specie.unique_index, specie.formula, specie.totcharge, specie.smiles)

elif specie.subtype == "metal":
idx = specie.possible_cs.index(final_charge)
specie.set_charge(specie.possible_cs[idx])
specie.set_charge(final_charge)
if debug >= 1: print(specie.unique_index, specie.formula, specie.charge)

######################################################
Expand All @@ -358,45 +370,15 @@ def validate_reference_molecules(self, debug):
else:
self.validate_non_complex_molecule(ref, debug)



def validate_complex_ligands(unique_species, ref, idx, debug):
"""Validate ligands within complex reference molecules."""
for jdx, lig in enumerate(ref.ligands):
for kdx, specie in enumerate(unique_species):
if specie.subtype == "ligand" and lig.formula == specie.formula:
issame = self.compare_and_set_charge(lig, specie, debug)
if not issame:
print("Check Error", f"{idx=}, {jdx=}, {kdx=}", lig.formula, specie.formula, issame)



def compare_and_set_charge(self, lig, specie, debug):
"""Compare ligands and species, setting charge if they match."""
issame = self.compare_species_or_nitrosyl(lig, specie)
if issame:
set_charge_state(specie, lig, mode=1, debug=debug)
return issame
def compare_species_or_nitrosyl(self, lig, specie):
"""Compare if two entities are the same, considering nitrosyl properties."""
if not hasattr(lig, "is_nitrosyl"):
lig.evaluate_as_nitrosyl()
if not hasattr(specie, "is_nitrosyl"):
specie.evaluate_as_nitrosyl()
if lig.is_nitrosyl and specie.is_nitrosyl:
return lig.NO_type == specie.NO_type
return compare_species(lig, specie)

def validate_complex_metals(self, ref, debug):
"""Validate metals within complex reference molecules."""
for met in ref.metals:
for specie in self.unique_species:
if specie.subtype == "metal" and compare_metals(met, specie):
met.set_charge(specie.charge)

def validate_non_complex_molecule(self, ref, debug):
"""Validate non-complex reference molecules."""
for specie in self.unique_species:
if specie.subtype == "molecule" and ref.formula == specie.formula:
if compare_species(ref, specie):
set_charge_state(specie, ref, mode=1, debug=debug)
######################################################
def predict_metal_ox (metal:object, debug: int=0) -> None:
model = "Fe_mono_m_ox_5486.pkl"
feature = generate_feature_vector (metal, target_prop = "m_ox", debug=debug)
path_rf = os.path.join( os.path.abspath(os.path.dirname(__file__)), model)
ramdom_forest = pickle.load(open(path_rf, 'rb'))
predictions = ramdom_forest.predict(feature)
m_ox_rf = predictions[0]
print(f"PREDICT_METAL_OS: metal OS of the metal {metal.label} is predicted as {m_ox_rf} using Random Forest model")

return m_ox_rf
######################################################
2 changes: 1 addition & 1 deletion cell2mol/random_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@

dataframe=argv[1]
metal = argv[2]
#mode = argv[3]
prop = argv[3]
mode = argv[4]


print("Sklearn version:", sklearn.__version__)
Expand Down
15 changes: 5 additions & 10 deletions cell2mol/spin.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,32 +75,27 @@ def generate_feature_vector (metal: object, target_prop: str, debug: int = 0) ->
Returns:
feature (np.ndarray): feature vector
"""
if debug > 1: print(f"GENERATE_feature_vector: {metal.label}")
if debug >= 1: print(f"GENERATE_feature_vector: {metal.label}")

elem_nr = elemdatabase.elementnr[metal.label]
m_ox = metal.charge
valence_elec = metal.get_valence_elec(metal.charge)
if debug > 1: print(f"GENERATE_feature_vector: {elem_nr=} {m_ox=} {valence_elec=}")

coord_group = metal.get_connected_groups(debug=debug)
coord_nr = metal.coord_nr
geom_nr = make_geom_list()[metal.coord_geometry]
if debug > 1: print(f"GENERATE_feature_vector: {metal.coord_nr=} {metal.coord_geometry=} {geom_nr=}")

rel_metal_radius = metal.rel_metal_radius
if debug > 1: print(f"GENERATE_feature_vector: {metal.rel_metal_radius=}")

coord_hapticty = [ group.is_haptic for group in coord_group ]
if any(coord_hapticty) : hapticity = 1
else : hapticity = 0
if debug > 1: print(f"GENERATE_feature_vector: {hapticity=}")

if target_prop == "m_ox":
feature = np.array([[elem_nr, coord_nr, geom_nr, rel_metal_radius, hapticity]])
if debug > 1: print(f"GENERATE_feature_vector: {feature=}")
if debug >= 1: print(f"GENERATE_feature_vector: {feature=}")
elif target_prop == "spin":
m_ox = metal.charge
valence_elec = metal.get_valence_elec(metal.charge)
feature = np.array([[elem_nr, m_ox, valence_elec, coord_nr, geom_nr, rel_metal_radius, hapticity]])
if debug > 1: print(f"GENERATE_feature_vector: {feature=}")
if debug >= 1: print(f"GENERATE_feature_vector: {feature=}")

return feature

Expand Down
17 changes: 11 additions & 6 deletions cell2mol/unitcell.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from ase.io import read
from cell2mol.classes import cell
from cell2mol.new_c2m_module import cell2mol
from cell2mol.new_charge_assignment import assign_charge_state_for_unique_species, balance_charge
from cell2mol.new_charge_assignment import assign_charge_to_specie
from cell2mol.other import handle_error
import copy
# Constants
Expand Down Expand Up @@ -109,14 +109,19 @@ def perform_cell2mol(newcell, refcell, sym_ops, cell_fname, ref_cell_fname, debu
cell2mol_mode(newcell, refcell, sym_ops, mode, debug)

# Assign and balance charges
final_charge_distribution, final_charges = balance_charge(newcell.unique_indices, refcell.unique_species, debug=debug)
print(f"{final_charges=}")
refcell.unique_species = assign_charge_state_for_unique_species(newcell.unique_species, final_charges[0], debug=2)
for specie in newcell.unique_species:
for refspecie in refcell.unique_species:
if specie.unique_index == refspecie.unique_index:
if specie.subtype == "metal":
assign_charge_to_specie(refspecie, specie.charge, debug=debug)
else:
assign_charge_to_specie(refspecie, specie.totcharge, debug=debug)

for specie in refcell.unique_species:
if specie.subtype == "metal":
print("refcell.unique_species", specie.formula, specie.charge)
print("refcell.unique_species", specie.formula, specie.charge, specie.unique_index)
else:
print("refcell.unique_species", specie.formula, specie.totcharge)
print("refcell.unique_species", specie.formula, specie.totcharge, specie.unique_index)
# Finalize refcell properties and save both cell objects
refcell.assign_charges_for_refcell(debug=debug)
refcell.assign_spin(debug=debug)
Expand Down
2 changes: 1 addition & 1 deletion cell2mol/xyz_molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
COV_FACTOR = 1.3
METAL_FACTOR = 1.0

def get_molecule (input_path, name, current_dir, debug=0):
def get_molecule (input_path, name, current_dir, debug=2):

molec_fname = os.path.join(current_dir, f"Molecule_{name}.mol")
output_fname = os.path.join(current_dir, "cell2mol.out")
Expand Down

0 comments on commit 2c899a9

Please sign in to comment.