-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgraph.py
209 lines (190 loc) · 7.58 KB
/
graph.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
import networkx as nx
from random import shuffle
from random import random
import numpy as np
import math
from pubchempy import get_compounds, Compound
from RDkit.rdkit import Chem
from RDkit.rdkit.Chem import rdchem
from data import bond_types
def read_molecule_from_pubchem_id(pubchem_id:str) -> nx.Graph:
mol = Compound.from_cid(pubchem_id)
return read_molecule_mol(mol.canonical_smiles)
def mol_to_nx(mol: rdchem.Mol) -> nx.Graph:
'''
@Source: https://github.com/dakoner/keras-molecules/blob/dbbb790e74e406faa70b13e8be8104d9e938eba2/convert_rdkit_to_networkx.py
input: Chem.rdchem.Mol type object
Output: Networx Graph of the molecule.
'''
G = nx.Graph()
for atom in mol.GetAtoms():
G.add_node(atom.GetIdx(),
element = atom.GetSymbol(),
atomic_num=atom.GetAtomicNum(),
formal_charge=atom.GetFormalCharge(),
chiral_tag=atom.GetChiralTag(),
hybridization=atom.GetHybridization(),
num_explicit_hs=atom.GetNumExplicitHs(),
is_aromatic=atom.GetIsAromatic())
for bond in mol.GetBonds():
G.add_edge(bond.GetBeginAtomIdx(),
bond.GetEndAtomIdx(),
bond_type=bond.GetBondType(),
is_aromatic=bond.GetIsAromatic(),
conjugation=bond.GetIsConjugated(),
stereochemistry=bond.GetStereo(),
bond_dir=bond.GetBondDir(),
is_ring=bond.IsInRing(),
is_in_ring_size_5=bond.IsInRingSize(5),
is_in_ring_size_6=bond.IsInRingSize(6),
is_in_ring_size_3=bond.IsInRingSize(3),
is_in_ring_size_7=bond.IsInRingSize(7))
return G
def nx_to_mol(G: nx.Graph) -> rdchem.Mol:
'''
@source: https://github.com/dakoner/keras-molecules/blob/dbbb790e74e406faa70b13e8be8104d9e938eba2/convert_rdkit_to_networkx.py
input: A networX graph representation of a molecule
output: Chem.rdchem.Mol object of the corresponding molecule.
'''
mol = Chem.RWMol()
atomic_nums = nx.get_node_attributes(G, 'atomic_num')
chiral_tags = nx.get_node_attributes(G, 'chiral_tag')
formal_charges = nx.get_node_attributes(G, 'formal_charge')
node_is_aromatics = nx.get_node_attributes(G, 'is_aromatic')
node_hybridizations = nx.get_node_attributes(G, 'hybridization')
num_explicit_hss = nx.get_node_attributes(G, 'num_explicit_hs')
node_to_idx = {}
for node in G.nodes():
a=Chem.Atom(atomic_nums[node])
a.SetChiralTag(chiral_tags[node])
a.SetFormalCharge(formal_charges[node])
a.SetIsAromatic(node_is_aromatics[node])
a.SetHybridization(node_hybridizations[node])
a.SetNumExplicitHs(num_explicit_hss[node])
idx = mol.AddAtom(a)
node_to_idx[node] = idx
bond_types = nx.get_edge_attributes(G, 'bond_type')
for edge in G.edges():
first, second = edge
ifirst = node_to_idx[first]
isecond = node_to_idx[second]
bond_type = bond_types[first, second]
mol.AddBond(ifirst, isecond, bond_type)
Chem.SanitizeMol(mol)
return mol
def read_molecule_nx(smiles_rep: str) -> nx.Graph:
# G = read_smiles(smiles_rep)
G = Chem.MolFromSmiles(smiles_rep)
return mol_to_nx(G)
def read_molecule_mol(smiles_rep: str) -> rdchem.Mol:
# G = read_smiles(smiles_rep)
G = Chem.MolFromSmiles(smiles_rep)
return G
def sequence_on_graph(G: nx.Graph):
'''
Generates a random ordering of nodes and edges in Uniform distribution
input: A grapg G of type nx.Graph()
Output: List of nodes in order of node ordering and a list of list of tuples of edge orders.
'''
nodes = list(G.nodes())
shuffle(nodes)
ordering = set()
edge_ordering = []
for node in nodes:
ordering.add(node)
sub_ordering = []
edge_list = G[node]
for adjacent_node in edge_list:
if adjacent_node in ordering:
sub_ordering.append((node,adjacent_node,bond_types[str(G[node][adjacent_node]['bond_type'])]))
shuffle(sub_ordering)
edge_ordering.append(sub_ordering)
return ([ (node,G.nodes(data=True)[node]['atomic_num']) for node in nodes],edge_ordering)
def sequence_on_graph_geometric(G: nx.Graph):
'''
Generates a random ordering of nodes and edges in Geometric distribution (Restricted Sample space)
input: A graph G of type nx.Graph()
Output: List of nodes in order of node ordering, a list of list of tuples of edge orders and the log-probability of this sequence.
'''
nodes = list(G.nodes())
# for (n,nbrdict) in G.adjacency():
# for adjacent_n in nbrdict.keys():
# nodes.append(n)
# shuffle(nodes)
ordering = set()
edge_ordering = []
nodes_selected = []
nodes_left = nodes
log_prob = 0.0
probabilities = []
p = 1/2
for i in range(len(nodes)-1):
probabilities.append(p)
p*=1/2
if(len(probabilities)!=0):
probabilities.append(p*2)
else:
probabilities.append(1)
while(len(nodes_left)!=0):
i = np.random.choice(np.arange(len(nodes_left)),p=probabilities)
selected_node = nodes_left[i]
log_prob += np.log(probabilities[i])
nodes_selected.append(selected_node)
nodes_left.remove(selected_node)
probabilities.pop()
if(len(probabilities)!=0):
probabilities[-1] *=2
for node in nodes_selected:
ordering.add(node)
sub_ordering = []
edge_list = G[node]
for adjacent_node in edge_list:
if adjacent_node in ordering:
sub_ordering.append((node,adjacent_node,bond_types[str(G[node][adjacent_node]['bond_type'])]))
shuffle(sub_ordering)
edge_ordering.append(sub_ordering)
return ( [(node,G.nodes(data=True)[node]['atomic_num']) for node in nodes_selected] , edge_ordering, log_prob )
def construct_graph(node_ordering,edge_ordering) -> nx.Graph:
'''
Constructs the graph, given its node ordering and edge ordering
inputs:
@params node_ordering: List of nodes in order of node_ordering
@params edge_ordering: List of list of tuples of edges in edge ordering following the node_ordering
Outputs:
A graph G of type nx.Graph()
'''
G = nx.Graph()
new_edge_ordering = []
for edge_list in edge_ordering:
for edge in edge_list:
new_edge_ordering.append(tuple(edge))
G.add_nodes_from(node_ordering)
G.add_edges_from(new_edge_ordering)
return G
def valid_molecule(G: nx.Graph) -> bool :
'''
Checks all valency constraints are satisfied
Valency chosen for validity is the common valency shown by the atoms
C(4), N(3), O(2), X(1) and so on
'''
valid = True
for (node,node_attr_dict) in G.nodes(data=True):
num_H = int(node_attr_dict['hcount'])
count = num_H
for edge, edge_attr_dict in G[node]:
count += edge_attr_dict['order']
count = math.ceil(count)
if node_attr_dict['element'] == 'C':
if count-node_attr_dict['charge'] != 4:
return False
elif node_attr_dict['element'] == 'O':
if count-node_attr_dict['charge'] != 2:
return False
return valid
def generate_graph_from_sequence(G: nx.Graph,node_ordering,edge_ordering) -> nx.Graph:
G_ = nx.Graph()
for i,node in enumerate(node_ordering):
G_.add_nodes_from([(node,G.nodes[node])])
for edge in edge_ordering[i]:
G_.add_edges_from( [ (edge[0],edge[1],G.edges[edge]) ] )
return G_