-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathGraph.py
208 lines (182 loc) · 8.68 KB
/
Graph.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
import re
import pandas as pd
import numpy as np
from itertools import combinations
from networkx import DiGraph
from network2tikz import plot
from rdkit import Chem
from rdkit.Chem import PandasTools
import matplotlib.pyplot as plt
import networkx as nx
def graph_from_df(input_file, transitions, figname="network.png"):
"""Return a DiGraph from Pandas DataFrame."""
from networkx import DiGraph
pos = {}
Hs = input_file["nHydrogens"]
for i in range(max(Hs)+1):
m = np.where(i == Hs)[0]
if len(m) == 1: matches = [0]
else: matches = np.linspace(-len(m), len(m), len(m))/len(m)
for x,k in enumerate(m):
key = str(input_file["microstate ID"][k].split("micro")[-1])
pos[key] = (Hs[k], matches[x])
from_list, to_list = transitions.transpose()
from_list = [str(input_file["microstate ID"][i].split("micro")[-1]) for i in from_list]
to_list = [str(input_file["microstate ID"][i].split("micro")[-1]) for i in to_list]
convert = lambda txt: int(txt) if txt.isdigit() else txt
micros = sorted(set(np.concatenate([to_list, from_list])), key=lambda x:[convert(s) for s in re.split("([0-9]+)",x)])
# Direction of edges of the graph is deprotonated -> protonated state
graph = DiGraph()
#graph.add_edges_from(zip(from_list, to_list, properties))
graph.add_edges_from(zip(from_list, to_list))
#NOTE: Get Network of states
fig, (ax) = plt.subplots(1)
fig.set_figheight(10);fig.set_figwidth(10)
nx.draw(graph, pos, with_labels=True, arrows=False, ax=ax, font_weight='bold')
#plt.axis('equal', adjustable='datalim')
ax.set_aspect(aspect=.9, adjustable='box', anchor='C', share=False)
fig.savefig(figname, bbox_inches='tight', dpi=100)
def tikz_graph_from_df(input_file, transitions, out="network.tex"):
"""Return a DiGraph from Pandas DataFrame.
https://github.com/hackl/network2tikz
https://github.com/hackl/network2tikz/blob/master/examples/ex_networkx.py
"""
import networkx as nx
#from networkx import DiGraph
PandasTools.AddMoleculeColumnToFrame(input_file, smilesCol='smiles', molCol='molecule')
pos = {}
Hs = input_file["nHydrogens"]
for i in range(max(Hs)+1):
m = np.where(i == Hs)[0]
if len(m) == 1: matches = [0]
else: matches = np.linspace(-len(m), len(m), len(m))/len(m)
for x,k in enumerate(m):
key = str(input_file["microstate ID"][k].split("micro")[-1])
pos[key] = (Hs[k], matches[x])
from_list, to_list = transitions.transpose()
from_list = [str(input_file["microstate ID"][i].split("micro")[-1]) for i in from_list]
to_list = [str(input_file["microstate ID"][i].split("micro")[-1]) for i in to_list]
convert = lambda txt: int(txt) if txt.isdigit() else txt
micros = sorted(set(np.concatenate([to_list, from_list])), key=lambda x:[convert(s) for s in re.split("([0-9]+)",x)])
# Direction of edges of the graph is deprotonated -> protonated state
graph = nx.DiGraph(directed=True)
for i,name in enumerate([ID.split("micro")[-1] for ID in list(input_file["microstate ID"])]):
graph.add_node(name, name=name, smiles=list(input_file["smiles"])[i])
nodes = nx.get_node_attributes(graph, 'name')
for node in zip(from_list, to_list):
graph.add_edge(node[0],node[1])
style = {}
# TODO: Continue adding the style that you want for the LaTeX Network
style['edge_directed'] = {edge:True for edge in graph.edges()}
style['edge_directed'] = {edge:True for edge in graph.edges()}
# general options
style['unit'] = 'mm'
style['layout'] = layout
style["margin"] = {'top':5,'bottom':8,'left':5,'right':5}
style["canvas"] = (100,60)
style['keep_aspect_ratio'] = False
plot(graph, out, **style)
def DrawMolZoomed(smiles, legend=None, subImgSize=(100, 100)):
from PIL import Image
from io import BytesIO
from rdkit.Chem import AllChem
from rdkit.Chem.Draw import rdMolDraw2D
from rdkit.Chem import Draw
import matplotlib.pyplot as plt
mol = mol = Chem.MolFromSmiles(smiles)
size = (subImgSize[0], subImgSize[1])
full_image = Image.new('RGBA', size)
d2d = rdMolDraw2D.MolDraw2DCairo(subImgSize[0], subImgSize[1])
opts = d2d.drawOptions()
opts.legendFontSize = 20
d2d.SetFontSize(1)
d2d.DrawMolecule(mol, legend)
d2d.FinishDrawing()
sub = Image.open(BytesIO(d2d.GetDrawingText()))
full_image.paste(sub)
return full_image
def graph_from_df_(input_file, transitions_file, column=4, results_file=None,
molSize=0.15, subImgSize=(100,100), figsize=(10,16),
edge_label_font_size=16, figname="network.png"):
"""Return a DiGraph from Pandas DataFrame.
Args:
input_file(str) - path to file that contains information about the nodes
transitions_file(str) - path to numpy file with: array(array(2)...array(2))
node-to-node transitions
column(int) - the column (0-indexing) from `input_file` to be used for
"""
import networkx as nx
import re
import numpy as np
from itertools import combinations
from networkx import DiGraph
from network2tikz import plot
from rdkit import Chem
from rdkit.Chem import PandasTools
import matplotlib.pyplot as plt
import networkx as nx
#from networkx import DiGraph
# load numpy of transitions from node to node
transitions = np.load(transitions_file)
# load csv file of informations regarding each node
input_file = pd.read_csv(input_file, index_col=False)
# argument to function is the number of the column you wish to use to organize
# the nodes in. e.g., Organize nodes by the number of ionizable hydrogens
key = list(input_file.keys())[column]
pos = {} # gather the positions of each node
Hs = input_file[key]
for i,q in enumerate(Hs):
m = np.where(q == Hs)[0]
if len(m) == 1: matches = [0]
else: matches = np.linspace(-len(m), len(m), len(m))/len(m)
for x,k in enumerate(m):
key = str(list(input_file["microstate ID"])[k].split("micro")[-1])
pos[key] = (-list(Hs)[k], matches[x])
# transitions from --> to (lists)
from_list, to_list = transitions.transpose()
from_list = [str(list(input_file["microstate ID"])[i].split("micro")[-1]) for i in from_list]
to_list = [str(list(input_file["microstate ID"])[i].split("micro")[-1]) for i in to_list]
convert = lambda txt: int(txt) if txt.isdigit() else txt
micros = sorted(set(np.concatenate([to_list, from_list])), key=lambda x:[convert(s) for s in re.split("([0-9]+)",x)])
#NOTE: Using DiGraph from networkx
# Direction of edges of the graph is deprotonated -> protonated state
graph = nx.DiGraph(directed=True)
for i,name in enumerate([ID.split("micro")[-1] for ID in list(input_file["microstate ID"])]):
graph.add_node(name, name=name, smiles=list(input_file["smiles"])[i])
nodes = nx.get_node_attributes(graph, 'name')
if results_file:
results = pd.read_pickle(results_file)
#Gs = [f'{G:.2f}±{list(results["G std"])[i]:.2f}' for i,G in enumerate(list(results["G"]))]
Gs = [f'{G:.2f}±{list(results["posterior_std"])[i]:.2f}' for i,G in enumerate(list(results["posterior"]))]
labels = {}
for i,node in enumerate(zip(from_list, to_list)):
graph.add_edge(node[0]*scale,node[1]*scale, label=Gs[i])
labels[(node[0]*scale,node[1])*scale] = Gs[i]
else:
for i,node in enumerate(zip(from_list, to_list)):
graph.add_edge(node[0],node[1])
# Put the network inside a matplotlib plot image
#NOTE: Get Network of states
fig, (ax) = plt.subplots(1)
fig.set_figheight(figsize[0]);fig.set_figwidth(figsize[1])
nx.draw(graph, pos)#, with_labels=False, arrows=False, ax=ax)#, font_weight='bold')
if results_file: nx.draw_networkx_edge_labels(graph, pos, edge_labels=labels,
font_size=edge_label_font_size, label_pos=0.6, arrows=True)
trans=ax.transData.transform
trans2=fig.transFigure.inverted().transform
piesize=molSize # this is the image size
p2=piesize/2.0
# NOTE: Changing each of the nodes to have an image of the molecule instead of a circle
for node in graph:
xx,yy=trans(pos[node]) # figure coordinates
xa,ya=trans2((xx,yy)) # axes coordinates
a = plt.axes([xa-p2,ya-p2, piesize, piesize])
smiles = graph.nodes[node]['smiles']
ID = graph.nodes[node]['name']
img = DrawMolZoomed(smiles, subImgSize=subImgSize)#, legend=ID)
a.imshow(img)
a.axis('off')
# TODO: make the title the G value
a.set_title(graph.nodes[node]['name'], fontweight='bold', size=12)
fig.savefig(figname, bbox_inches='tight', dpi=100)
return graph,pos