-
Notifications
You must be signed in to change notification settings - Fork 2
/
deepchem_main.py
99 lines (88 loc) · 3.2 KB
/
deepchem_main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import matplotlib.pyplot as plt
import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader
import deepchem as dc
from fnn import FeedforwardNeuralNetwork
torch.manual_seed(43)
def loss_evaluator(dataloader, model, loss_func):
sample_size = len(dataloader.dataset)
pred_list = []
true_list = []
with torch.no_grad():
loss = 0
for each_X, each_y in dataloader:
each_pred = model.forward(each_X)
pred_list.extend(each_pred.tolist())
true_list.extend(each_y.tolist())
loss += loss_func(each_pred, each_y).item()
return loss / sample_size, pred_list, true_list
featurizer = dc.feat.RDKitDescriptors()
tasks, datasets, transformers \
= dc.molnet.load_bace_regression(featurizer)
train_set, val_set, test_set = datasets
train_dataloader = DataLoader(
TensorDataset(
torch.FloatTensor(train_set.X),
torch.FloatTensor(train_set.y)),
batch_size=32, shuffle=True)
val_dataloader = DataLoader(
TensorDataset(torch.FloatTensor(val_set.X),
torch.FloatTensor(val_set.y)),
batch_size=32,
shuffle=True)
test_dataloader = DataLoader(
TensorDataset(torch.FloatTensor(test_set.X),
torch.FloatTensor(test_set.y)),
batch_size=32,
shuffle=True)
model = FeedforwardNeuralNetwork(in_dim=train_set.X.shape[1],
hidden_dim=32)
loss_func = nn.MSELoss(reduction='sum')
optimizer = torch.optim.Adam(model.parameters(),
lr=1e-3,
weight_decay=1e-5)
n_step = 0
train_loss_list = []
val_loss_list = []
for each_epoch in range(30):
for each_X, each_y in train_dataloader:
if n_step % 10 == 0:
train_loss, _, _ = loss_evaluator(train_dataloader,
model,
loss_func)
val_loss, _, _ = loss_evaluator(val_dataloader,
model,
loss_func)
if n_step % 100 == 0:
print('step: {},\t\ttrain loss: {}'.format(
n_step, train_loss))
print('step: {},\t\tval loss: {}'.format(
n_step, val_loss))
train_loss_list.append((n_step, train_loss))
val_loss_list.append((n_step, val_loss))
each_pred = model.forward(each_X)
loss = loss_func(each_pred, each_y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
n_step += 1
fig, ax = plt.subplots(1, 1)
ax.plot(*list(zip(*train_loss_list)), marker='+')
ax.plot(*list(zip(*val_loss_list)), marker='.')
ax.set_title('Learning curve')
ax.set_xlabel('# of updates')
ax.set_ylabel('Loss')
ax.set_yscale('log')
plt.savefig('bace_loss.pdf')
plt.clf()
test_loss, pred_list, true_list = loss_evaluator(
train_dataloader, model, loss_func)
print('test_loss: {}'.format(test_loss))
fig, ax = plt.subplots(1, 1)
ax.scatter(pred_list, true_list)
ax.set_title('Test loss = {}'.format(test_loss))
ax.set_xlabel('Predicted normalized pIC50')
ax.set_ylabel('True normalized pIC50')
plt.savefig('bace_scatter.pdf')
plt.clf()