-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
161 lines (123 loc) · 4.2 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
"""
Main file agregator that reads a python config file and runs each script accordingly.
Running with no argument takes the template config.py.
>>> python3 main.py
Running with an argument searches for the config name.
>>> python3 main.py "config-qm7drugs"
"""
# %%
import sys
import time
from datetime import datetime
import pandas as pd
# read config
if len(sys.argv) > 1:
# argument gives special config name, eg config-drugs or config-qm7fragments, ..
config = __import__(sys.argv[1]).config
else:
# default file config.py
from config import config
target_names = config["target_names"]
config_name = config["config_name"]
print(f"Config name {config_name}")
print(f"Read {len(target_names)} target(s): {target_names}")
database = config["database"]
representation = config["representation"]
repository_folder = config["repository_folder"]
current_folder = config["repository_folder"]
size_subset = config["learning_curve_ticks"][-1]
# timings dump
current_time = datetime.now().strftime("%Y-%m-%d")
DUMP_PATH = f"{repository_folder}run/dump-{config_name}-{current_time}.csv"
dump = pd.DataFrame(
{"Property": ["num_targets", "targets"], "Value": [len(target_names), target_names]}
)
dump.to_csv(DUMP_PATH)
# concat and save function
def add_onto_and_save(df, prop, value):
df = pd.concat([df, pd.DataFrame({"Property": [prop], "Value": [value]})])
global DUMP_PATH
df.to_csv(DUMP_PATH)
return df
# %%
# generate representations
if config["generate_database"]:
from scripts.generate import generate_database
timer = time.time()
generate_database(config)
timer = time.time() - timer
dump = add_onto_and_save(dump, "time_generate_database", timer)
if config["generate_targets"]:
from scripts.generate import generate_targets
timer = time.time()
generate_targets(config)
timer = time.time() - timer
dump = add_onto_and_save(dump, "time_generate_targets", timer)
# %%
# generate fps, cur subset
if config["cur_subset"]:
from scripts.cur_subset import cur_subset
timer = time.time()
cur_subset(config)
timer = time.time() - timer
dump = add_onto_and_save(dump, "time_cur_subset", timer)
if config["fps_subset"]:
from scripts.fps_subset import fps_subset
timer = time.time()
fps_subset(config)
timer = time.time() - timer
dump = add_onto_and_save(dump, "time_fps_subset", timer)
# %%
# generate sml subset
if config["sml_subset"]:
from scripts.sml_subset import sml_subset
timer = time.time()
sml_subset(config)
timer = time.time() - timer
dump = add_onto_and_save(dump, "time_sml_subset", timer)
# %%
# generate algo model
if config["algo_model"]:
from scripts.algo_model import algo_model
timer = time.time()
algo_model(config)
timer = time.time() - timer
dump = add_onto_and_save(dump, "time_algo_model", timer)
# %% generate algo subset
if config["algo_subset"]:
from scripts.algo_subset import algo_subset
timer = time.time()
algo_subset(config)
timer = time.time() - timer
dump = add_onto_and_save(dump, "time_algo_subset", timer)
# %%
# generate learning curves
no_random_curves = [e for e in config["learning_curves"] if e != "random"]
if len(no_random_curves) != 0:
from scripts.learning_curves import learning_curves
timer = time.time()
learning_curves(config)
timer = time.time() - timer
dump = add_onto_and_save(dump, "time_learning_curves", timer)
# %%
if "random" in config["learning_curves"]:
from scripts.learning_curves import learning_curves_random
timer = time.time()
learning_curves_random(config, add_onto_old=False)
timer = time.time() - timer
dump = add_onto_and_save(dump, "time_learning_curves_random", timer)
# %%
# draw learning curves
if len(config["plots_individual"]) != 0:
from scripts.plots import plots_individual
timer = time.time()
plots_individual(config)
timer = time.time() - timer
dump = add_onto_and_save(dump, "time_plots_individual", timer)
if len(config["plots_average"]) != 0:
from scripts.plots import plots_average
timer = time.time()
plots_average(config)
timer = time.time() - timer
dump = add_onto_and_save(dump, "time_plots_average", timer)
# %%