-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgraphBuild.py
executable file
·105 lines (83 loc) · 3.95 KB
/
graphBuild.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
#!/usr/bin/env python3
import argparse
from ChemTools import *
import datetime
import time
################################################################# BEGIN SCRIPT #####################################################################################
# start by creating command-line interface
csBuilder = argparse.ArgumentParser(prog="CSGraphExplorer 1.0",description="a program that builds a local chemical space graph starting at a given seed molecule. Returns an HTML file containing a faerun visualization of the graph")
# SMILE seed is the only positional argument
csBuilder.add_argument('SMILE',type=str,help="the SMILE string of the molecule to seed the graph with")
# depth and cc have defaults
csBuilder.add_argument('--depth','-d',type=int,metavar=None,default=1,help="integer specifying depth of the graph. default is 1.")
csBuilder.add_argument('--exhaustive_connections','-ec',action="store_true",help="option to add exhaustive connections to the graph. default is false.")
csBuilder.add_argument('--name','-n',type=str,metavar=None, default = None,help="option to provide alternamtive name of the seed molecule (common name, abbreviation, etc). Default name of the seed is simply the SMILE string.")
csBuilder.add_argument('--plot','-pl',action="store_true",help="option to plot graph. default is false")
csBuilder.add_argument('--data','-df',action="store_true",help="option to save graph nodes as smiles and molecular properties to csv. default is false")
# parses args from terminal
args = csBuilder.parse_args()
seed = args.SMILE
depth = args.depth
cc = args.exhaustive_connections
name = args.name
plot = args.plot
data = args.data
####################### chatGPT ################
###### Build the graph ######
# Record start time
start_time = time.time()
# Run buildGraph function
csg = buildGraph(seed,depth,cc)
# Record time taken for buildGraph function
buildGraph_time = time.time() - start_time
### name graph if provided
if name is None:
csg_name = seed+"_d"+str(depth)+"_cc"+str(cc)
else: csg_name = name+"_d"+str(depth)+"_cc"+str(cc)
##### Plot the Graph #####
plot_stamp = time.time()
if plot:
# get list of smile strings from graph, get corresponding fingerprints list
node_labels, fps = get_node_labels_fps(csg)
# Run faerunPlot function:
faerunPlot(csg, csg_name,node_labels,fps)
# Record time taken for faerunPlot function
faerunPlot_time = time.time() - plot_stamp
####################### END chatGPT ################
data_stamp = time.time()
if data:
##### Save molecular properties data #####
# if the plot argument was given, then plot is false and node_labels must be created
print("Retrieving smiles from cs graph")
if plot is False or None:
node_labels = []
for smile in csg.nodes():
node_labels.append(smile)
print("Retrieving molecular properties")
# get the molecular properties nested lists
NHD, NHA, MWT, MLP, MMR, NAT, PSA, qed = getPropList(node_labels)
print("Saving data frame...")
prop_cols = {"SMILE":node_labels, "nhd":NHD, "nha":NHA, "mwt:":MWT, "mlp":MLP, "mmr":MMR, "nat":NAT, "psa":PSA, "qed":qed}
df = pd.DataFrame(prop_cols)
fname = seed + "_d" + str(depth) + "_ec" + str(cc) + ".csv"
df.to_csv(fname, index=False)
print("Data frame successfuly saved as :",fname)
data_time = time.time() - data_stamp
if data:
print("Total time for collecting and saving data:",reportTime(data_time))
##### Logging & Results ######
# Create filename for text file
log_file_name = "log_"+seed+"_d"+str(depth)+"_ec"+str(cc)+".txt"
# Write data to text file
# with open(filename, "w") as f:
f = open(log_file_name, "a")
f.write("\n\nBuildGraph time: " + reportTime(buildGraph_time) + "\n")
if plot:
f.write("faerunPlot time: " + reportTime(faerunPlot_time) + "\n")
if data:
f.write("save data time: "+reportTime(data_time)+"\n")
# Calculate total time taken for entire program
total_time = buildGraph_time + faerunPlot_time + data_time
print("Total Program Time:",reportTime(total_time))
f.write("total time: " + reportTime(total_time) + "\n")
f.close()