-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpandas_pybrain.py
92 lines (72 loc) · 2.97 KB
/
pandas_pybrain.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# -*- coding: utf-8 -*-
"""
Created on Tue Apr 07 23:35:15 2015
@author: leo
"""
import pandas as pd
from pybrain.datasets import SupervisedDataSet
from pybrain.tools.shortcuts import buildNetwork
from pybrain.supervised.trainers import BackpropTrainer
from pybrain.structure.modules import SigmoidLayer
from pybrain.structure.modules import TanhLayer
def make_pybrain_ds(table, prediction_cols, to_predict, normalise = True):
'''Takes pandas data frame and returns pybrain ds
prediction_cols : columns used to make prediction
to_predict :
'''
print 'Creating dataset...'
if normalise:
table = table[prediction_cols].apply(lambda x: (x - x.mean()) / (x.max() - x.min()))
ds = SupervisedDataSet(len(prediction_cols), len(to_predict))
for row in table.iterrows():
# FI: row is tuple (index, Serie)
ds.addSample(tuple(row[1][prediction_cols]), tuple(row[1][to_predict]))
print 'Dataset created'
return ds
def nn_predict(train, test, prediction_cols, to_predict,
n_nodes,
hiddenclass,
learningrate,
num_epochs,
verbose = True):
ds = make_pybrain_ds(train, pour_predire_cols, to_predict)
ds_test = make_pybrain_ds(test, pour_predire_cols, to_predict)
net = buildNetwork( ds.indim, n_nodes, ds.outdim, bias = True, hiddenclass = eval(hiddenclass))
trainer = BackpropTrainer(net, dataset=ds, learningrate= learningrate, lrdecay=1.0, momentum=0.0, verbose=False, batchlearning=False, weightdecay=0.0)
if to_predict == 'place_geny':
train = train[train.is_place]
if verbose:
print 'XXXXXXXXXXXXXXXXXXXXXXXXXX'
print 'Predicting :', to_predict
print 'n_nodes_1 :', n_nodes_1
print 'n_nodes_2 :', n_nodes_2
print 'Layer :', hiddenclass
print 'learningrate :', learningrate
for epoch in range(num_epochs):
trainer.train()
a = pd.DataFrame(net.activateOnDataset(ds_test))
a.columns = [to_predict + '_predict']
a.index = test.index
test[to_predict + '_predict'] = a[to_predict + '_predict']
return (trainer, test)
# Network
#n_nodes_1 = 14 #
#n_nodes_2 = None # None if no second level
#
#hiddenclass = 'SigmoidLayer'
#learningrate = 0.02
#num_epochs = 3
#verbose = True
#test_copy = test.copy()
#train_copy = train.copy()
#for col in pour_predire_cols:
# train[col] = (train[col] - train[col].mean()) / train[col].std()
# test[col] = (test[col] - test[col].mean()) / test[col].std()
if __name__ == '__main__':
print 'len pour_predire_cols', len(pour_predire_cols)
test_1 = nn_predict(train, test, pour_predire_cols,
to_predict = 'is_place',
n_nodes_1 = 20, n_nodes_2 = None,
hiddenclass = 'SigmoidLayer',
learningrate = 0.05,
num_epochs = 5)