Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make surface molecules from smiles using Argon workaround #2607

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion rmgpy/molecule/adjlist.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,9 @@ def check_partial_charge(atom):
the theoretical one:

"""
if atom.symbol in {'X','L','R'}:
# if atom.symbol in {'X','L','R'} or 'X' in [z.label for z in get_atomtype(atom, atom.bonds).generic]:
# TODO handle this in a more generic way so we don't have to add more metals here
if atom.symbol in {'X','L','R','Pt'}:
return # because we can't check it.

valence = PeriodicSystem.valence_electrons[atom.symbol]
Expand Down
27 changes: 21 additions & 6 deletions rmgpy/molecule/atomtype.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ def get_features(self):
'Cl','Cl1s',
'Br','Br1s',
'I','I1s',
'F','F1s','X','Xv','Xo'])
'F','F1s','X','Xv','Xo','Pt','Ptv','Pto'])

ATOMTYPES['Rx!H'] = AtomType(label='Rx!H', generic=['Rx'], specific=[
'R!H',
Expand All @@ -273,20 +273,29 @@ def get_features(self):
'Cl','Cl1s',
'Br','Br1s',
'I','I1s',
'F','F1s','X','Xv','Xo'])
'F','F1s','X','Xv','Xo','Pt','Ptv','Pto'])

# Surface sites:
ATOMTYPES['X'] = AtomType(label='X', generic=['Rx', 'Rx!H'], specific=['Xv', 'Xo'])
ATOMTYPES['X'] = AtomType(label='X', generic=['Rx', 'Rx!H'], specific=['Xv', 'Xo', 'Pt'])

# Vacant surface site:
ATOMTYPES['Xv'] = AtomType('Xv', generic=['X','Rx', 'Rx!H'], specific=[],
ATOMTYPES['Xv'] = AtomType('Xv', generic=['X','Rx', 'Rx!H'], specific=['Ptv'],
single=[0], all_double=[0], r_double=[], o_double=[], s_double=[], triple=[0], quadruple=[0],
benzene=[0], lone_pairs=[0])
# Occupied surface site:
ATOMTYPES['Xo'] = AtomType('Xo', generic=['X','Rx', 'Rx!H'], specific=[],
ATOMTYPES['Xo'] = AtomType('Xo', generic=['X','Rx', 'Rx!H'], specific=['Pto'],
single=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], all_double=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], r_double=[], o_double=[], s_double=[], triple=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
quadruple=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], benzene=[0], lone_pairs=[0])

ATOMTYPES['Pt'] = AtomType(label='Pt', generic=['Rx', 'Rx!H', 'X'], specific=['Ptv', 'Pto'])
# single=[0], all_double=[0], r_double=[], o_double=[], s_double=[], triple=[0], quadruple=[0],
# benzene=[0], lone_pairs=[0])
ATOMTYPES['Ptv'] = AtomType(label='Ptv', generic=['Rx', 'Rx!H', 'X', 'Pt', 'Xv'], specific=[],
single=[0], all_double=[0], r_double=[], o_double=[], s_double=[], triple=[0], quadruple=[0],
benzene=[0], lone_pairs=[0])
ATOMTYPES['Pto'] = AtomType(label='Pto', generic=['Rx', 'Rx!H', 'X', 'Pt', 'Xo'], specific=[],
single=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], all_double=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], r_double=[], o_double=[], s_double=[], triple=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
quadruple=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18], benzene=[0], lone_pairs=[0])

# Non-surface atomTypes, R being the most generic:
ATOMTYPES['R'] = AtomType(label='R', generic=['Rx'], specific=[
Expand Down Expand Up @@ -675,6 +684,12 @@ def get_features(self):
ATOMTYPES['X'].set_actions(increment_bond=['X'], decrement_bond=['X'], form_bond=['X'], break_bond=['X'], increment_radical=[], decrement_radical=[], increment_lone_pair=[], decrement_lone_pair=[])
ATOMTYPES['Xv'].set_actions(increment_bond=[], decrement_bond=[], form_bond=['Xo'], break_bond=[], increment_radical=[], decrement_radical=[], increment_lone_pair=[], decrement_lone_pair=[])
ATOMTYPES['Xo'].set_actions(increment_bond=['Xo'], decrement_bond=['Xo'], form_bond=[], break_bond=['Xv'], increment_radical=[], decrement_radical=[], increment_lone_pair=[], decrement_lone_pair=[])
ATOMTYPES['Pt'].set_actions(increment_bond=['Pt'], decrement_bond=['Pt'], form_bond=['Pt'], break_bond=['Pt'], increment_radical=[], decrement_radical=[], increment_lone_pair=[], decrement_lone_pair=[])
ATOMTYPES['Ptv'].set_actions(increment_bond=[], decrement_bond=[], form_bond=['Pto'], break_bond=[], increment_radical=[], decrement_radical=[], increment_lone_pair=[], decrement_lone_pair=[])
ATOMTYPES['Pto'].set_actions(increment_bond=['Pto'], decrement_bond=['Pto'], form_bond=[], break_bond=['Ptv'], increment_radical=[], decrement_radical=[], increment_lone_pair=[], decrement_lone_pair=[])




ATOMTYPES['R'].set_actions(increment_bond=['R'], decrement_bond=['R'], form_bond=['R'], break_bond=['R'], increment_radical=['R'], decrement_radical=['R'], increment_lone_pair=['R'], decrement_lone_pair=['R'])
ATOMTYPES['R!H'].set_actions(increment_bond=['R!H'], decrement_bond=['R!H'], form_bond=['R!H'], break_bond=['R!H'], increment_radical=['R!H'], decrement_radical=['R!H'], increment_lone_pair=['R!H'], decrement_lone_pair=['R!H'])
Expand Down Expand Up @@ -812,7 +827,7 @@ def get_features(self):
ATOMTYPES['F1s'].set_actions(increment_bond=[], decrement_bond=[], form_bond=['F1s'], break_bond=['F1s'], increment_radical=['F1s'], decrement_radical=['F1s'], increment_lone_pair=[], decrement_lone_pair=[])

# these are ordered in priority of picking if a more general atomtype is encountered
allElements = ['H', 'C', 'O', 'N', 'S', 'P', 'Si', 'F', 'Cl', 'Br', 'I', 'Ne', 'Ar', 'He', 'X']
allElements = ['H', 'C', 'O', 'N', 'S', 'P', 'Si', 'F', 'Cl', 'Br', 'I', 'Ne', 'Ar', 'He', 'X', 'Pt']
# list of elements that do not have more specific atomTypes
nonSpecifics = ['H', 'He', 'Ne', 'Ar',]

Expand Down
8 changes: 4 additions & 4 deletions rmgpy/molecule/element.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,13 +123,13 @@ class PeriodicSystem(object):
isotopes of the same element may have slight different electronegativities, which is not reflected below
"""
valences = {'H': 1, 'He': 0, 'C': 4, 'N': 3, 'O': 2, 'F': 1, 'Ne': 0,
'Si': 4, 'P': 3, 'S': 2, 'Cl': 1, 'Br': 1, 'Ar': 0, 'I': 1, 'X': 4}
'Si': 4, 'P': 3, 'S': 2, 'Cl': 1, 'Br': 1, 'Ar': 0, 'I': 1, 'X': 4, 'Pt': 4}
valence_electrons = {'H': 1, 'He': 2, 'C': 4, 'N': 5, 'O': 6, 'F': 7, 'Ne': 8,
'Si': 4, 'P': 5, 'S': 6, 'Cl': 7, 'Br': 7, 'Ar': 8, 'I': 7, 'X': 4}
'Si': 4, 'P': 5, 'S': 6, 'Cl': 7, 'Br': 7, 'Ar': 8, 'I': 7, 'X': 4, 'Pt': 4}
lone_pairs = {'H': 0, 'He': 1, 'C': 0, 'N': 1, 'O': 2, 'F': 3, 'Ne': 4,
'Si': 0, 'P': 1, 'S': 2, 'Cl': 3, 'Br': 3, 'Ar': 4, 'I': 3, 'X': 0}
'Si': 0, 'P': 1, 'S': 2, 'Cl': 3, 'Br': 3, 'Ar': 4, 'I': 3, 'X': 0, 'Pt': 0}
electronegativity = {'H': 2.20, 'D': 2.20, 'T': 2.20, 'C': 2.55, 'C13': 2.55, 'N': 3.04, 'O': 3.44, 'O18': 3.44,
'F': 3.98, 'Si': 1.90, 'P': 2.19, 'S': 2.58, 'Cl': 3.16, 'Br': 2.96, 'I': 2.66, 'X': 0.0}
'F': 3.98, 'Si': 1.90, 'P': 2.19, 'S': 2.58, 'Cl': 3.16, 'Br': 2.96, 'I': 2.66, 'X': 0.0, 'Pt': 0}


################################################################################
Expand Down
26 changes: 23 additions & 3 deletions rmgpy/molecule/molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ def is_surface_site(self):
"""
Return ``True`` if the atom represents a surface site or ``False`` if not.
"""
return self.symbol == 'X'
return self.symbol == 'X' or self.symbol in [z.label for z in ATOMTYPES['X'].specific]

def is_bonded_to_surface(self):
"""
Expand Down Expand Up @@ -984,7 +984,25 @@ def __init__(self, atoms=None, symmetry=-1, multiplicity=-187, reactive=True, pr
self.from_inchi(inchi)
self._inchi = inchi
elif smiles:
self.from_smiles(smiles)
for surface_site_symbol in ['X', 'Pt']:
if surface_site_symbol in smiles:
assert 'Ar' not in smiles
self.from_smiles(smiles.replace(surface_site_symbol, 'Ar'))
lines = self.to_adjacency_list().split('\n')
for i, line in enumerate(lines):
if 'Ar' in line: # The adjacency list needs to use the identified 'X' for a site
lines[i] = lines[i].replace('Ar', surface_site_symbol)
# remove any extra electron pairs
lines[i] = lines[i].replace('p3', 'p0')
lines[i] = lines[i].replace('p2', 'p0')
lines[i] = lines[i].replace('p1', 'p0')
adj_list = '\n'.join(lines)
self = self.from_adjacency_list(adj_list)
# but now we have to change the symbol back to 'Pt or 'X' for the smiles
# self.smiles = self.smiles.replace('X', surface_site_symbol)
break
else:
self.from_smiles(smiles)
self._smiles = smiles

if multiplicity != -187: # it was set explicitly, so re-set it (from_smiles etc may have changed it)
Expand Down Expand Up @@ -1153,9 +1171,11 @@ def contains_surface_site(self):
Returns ``True`` iff the molecule contains an 'X' surface site.
"""
cython.declare(atom=Atom)
cython.declare(z=AtomType)
for atom in self.atoms:
if atom.symbol == 'X':
if atom.symbol == 'X' or atom.symbol in [z.label for z in ATOMTYPES['X'].specific]:
return True
# atom_type = get_atomtype(atom, atom.bonds)
return False

def number_of_surface_sites(self):
Expand Down
7 changes: 7 additions & 0 deletions rmgpy/molecule/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,11 @@ def _rdkit_translator(input_object, identifier_type, mol=None):
output = from_rdkit_mol(mol, rdkitmol)
elif isinstance(input_object, mm.Molecule):
# We are converting from a molecule to a string identifier
generic_X = False # keep track of whether this is generic 'X' or specific 'Pt'
for atom in input_object.vertices:
if atom.element.symbol == 'X':
generic_X = True
break
if identifier_type == 'smi':
rdkitmol = to_rdkit_mol(input_object, sanitize=False)
else:
Expand All @@ -391,6 +396,8 @@ def _rdkit_translator(input_object, identifier_type, mol=None):
else:
raise ValueError('Unexpected input format. Should be a Molecule or a string.')

if generic_X:
output = output.replace('Pt', 'X')
return output


Expand Down
20 changes: 19 additions & 1 deletion test/rmgpy/molecule/moleculeTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ def test_is_surface_site(self):
"""
for element in element_list:
atom = Atom(element=element, radical_electrons=0, charge=0, label="*1", lone_pairs=0)
if element.symbol == "X":
if element.symbol in ["X", "Pt"]:
assert atom.is_surface_site()
else:
assert not atom.is_surface_site()
Expand Down Expand Up @@ -1632,6 +1632,24 @@ def test_smiles(self):
"CCCC",
"O=C=O",
"[C]#N",
"[X]",
"[X]C=C[X]",
"O[X]",
"CO[X]",
"[XH]",
"C=C[X]",
"CO.[X]",
"C#[X]",
"CCC(C)[X]",
"[Pt]",
"[Pt]C=C[Pt]",
"O[Pt]",
"CO[Pt]",
"[PtH]",
"C=C[Pt]",
"CO.[Pt]",
"C#[Pt]",
"CCC(C)[Pt]"
]
for s in test_strings:
molecule = Molecule(smiles=s)
Expand Down
Loading